Merge branch 'master' into s3_zero_copy_replication

This commit is contained in:
Anton Ivashkin 2020-10-22 11:17:21 +03:00
commit f833501c77
1689 changed files with 45395 additions and 13083 deletions

1
.gitignore vendored
View File

@ -118,6 +118,7 @@ website/package-lock.json
# clangd cache
/.clangd
/.cache
/compile_commands.json

View File

@ -17,4 +17,4 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events
* [ClickHouse online meetup (in Russian)](https://clck.ru/R2zB9) on October 1, 2020.
* [ClickHouse virtual office hours](https://www.eventbrite.com/e/clickhouse-october-virtual-meetup-office-hours-tickets-123129500651) on October 22, 2020.

View File

@ -16,19 +16,6 @@ void trim(String & s)
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end());
}
// Uses separate replxx::Replxx instance to avoid loading them again in the
// current context (replxx::Replxx::history_load() will re-load the history
// from the file), since then they will overlaps with history from the current
// session (this will make behavior compatible with other interpreters, i.e.
// bash).
void history_save(const String & history_file_path, const String & line)
{
replxx::Replxx rx_no_overlap;
rx_no_overlap.history_load(history_file_path);
rx_no_overlap.history_add(line);
rx_no_overlap.history_save(history_file_path);
}
}
ReplxxLineReader::ReplxxLineReader(
@ -58,7 +45,10 @@ ReplxxLineReader::ReplxxLineReader(
}
else
{
rx.history_load(history_file_path);
if (!rx.history_load(history_file_path))
{
rx.print("Loading history failed: %s\n", strerror(errno));
}
if (flock(history_file_fd, LOCK_UN))
{
@ -128,7 +118,8 @@ void ReplxxLineReader::addToHistory(const String & line)
rx.history_add(line);
// flush changes to the disk
history_save(history_file_path, line);
if (!rx.history_save(history_file_path))
rx.print("Saving history failed: %s\n", strerror(errno));
if (locked && 0 != flock(history_file_fd, LOCK_UN))
rx.print("Unlock of history file failed: %s\n", strerror(errno));

View File

@ -313,13 +313,4 @@ namespace ZeroTraits
}
inline bool operator==(StringRef lhs, const char * rhs)
{
for (size_t pos = 0; pos < lhs.size; ++pos)
if (!rhs[pos] || lhs.data[pos] != rhs[pos])
return false;
return true;
}
std::ostream & operator<<(std::ostream & os, const StringRef & str);

View File

@ -1,3 +1,5 @@
#pragma once
#include <string>
#include <common/types.h>

View File

@ -1,6 +1,9 @@
/// Original is here https://github.com/cerevra/int
#pragma once
/// Original is here https://github.com/cerevra/int
/// Distributed under the Boost Software License, Version 1.0.
/// (See at http://www.boost.org/LICENSE_1_0.txt)
#include "throwError.h"
namespace wide

View File

@ -1,3 +1,5 @@
#pragma once
#include <optional>
#include <string>
#include <Poco/AutoPtr.h>

View File

@ -1,9 +1,9 @@
# This strings autochanged from release_lib.sh:
SET(VERSION_REVISION 54441)
SET(VERSION_REVISION 54442)
SET(VERSION_MAJOR 20)
SET(VERSION_MINOR 10)
SET(VERSION_MINOR 11)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 11a247d2f42010c1a17bf678c3e00a4bc89b23f8)
SET(VERSION_DESCRIBE v20.10.1.1-prestable)
SET(VERSION_STRING 20.10.1.1)
SET(VERSION_GITHASH 76a04fb4b4f6cd27ad999baf6dc9a25e88851c42)
SET(VERSION_DESCRIBE v20.11.1.1-prestable)
SET(VERSION_STRING 20.11.1.1)
# end of autochange

View File

@ -26,8 +26,8 @@ if (NOT USE_INTERNAL_ODBC_LIBRARY)
find_path (INCLUDE_ODBC sql.h)
if(LIBRARY_ODBC AND INCLUDE_ODBC)
add_library (unixodbc UNKNOWN IMPORTED)
set_target_properties (unixodbc PROPERTIES IMPORTED_LOCATION ${LIBRARY_ODBC})
add_library (unixodbc INTERFACE)
set_target_properties (unixodbc PROPERTIES INTERFACE_LINK_LIBRARIES ${LIBRARY_ODBC})
set_target_properties (unixodbc PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_ODBC})
set_target_properties (unixodbc PROPERTIES INTERFACE_COMPILE_DEFINITIONS USE_ODBC=1)

View File

@ -57,8 +57,8 @@ if (SANITIZE)
endif ()
elseif (SANITIZE STREQUAL "undefined")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
endif()

View File

@ -15,6 +15,10 @@ if (COMPILER_GCC)
elseif (COMPILER_CLANG)
# Require minimum version of clang/apple-clang
if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
# If you are developer you can figure out what exact versions of AppleClang are Ok,
# remove the following line and commit changes below.
message (FATAL_ERROR "AppleClang is not supported, you should install clang from brew.")
# AppleClang 10.0.1 (Xcode 10.2) corresponds to LLVM/Clang upstream version 7.0.0
# AppleClang 11.0.0 (Xcode 11.0) corresponds to LLVM/Clang upstream version 8.0.0
set (XCODE_MINIMUM_VERSION 10.2)

View File

@ -58,7 +58,10 @@ if (COMPILER_CLANG)
add_warning(unused-exception-parameter)
add_warning(unused-macros)
add_warning(unused-member-function)
# XXX: libstdc++ has some of these for 3way compare
if (USE_LIBCXX)
add_warning(zero-as-null-pointer-constant)
endif()
if (WEVERYTHING)
add_warning(everything)
@ -89,6 +92,11 @@ if (COMPILER_CLANG)
no_warning(weak-template-vtables)
no_warning(weak-vtables)
# XXX: libstdc++ has some of these for 3way compare
if (NOT USE_LIBCXX)
no_warning(zero-as-null-pointer-constant)
endif()
# TODO Enable conversion, sign-conversion, double-promotion warnings.
endif ()
elseif (COMPILER_GCC)
@ -170,6 +178,11 @@ elseif (COMPILER_GCC)
add_cxx_compile_options(-Wunused)
# Warn if vector operation is not implemented via SIMD capabilities of the architecture
add_cxx_compile_options(-Wvector-operation-performance)
# XXX: libstdc++ has some of these for 3way compare
if (USE_LIBCXX)
# Warn when a literal 0 is used as null pointer constant.
add_cxx_compile_options(-Wzero-as-null-pointer-constant)
endif()
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10)
# XXX: gcc10 stuck with this option while compiling GatherUtils code

View File

@ -26,8 +26,8 @@ if (NOT USE_INTERNAL_HYPERSCAN_LIBRARY)
if (LIBRARY_HYPERSCAN AND INCLUDE_HYPERSCAN)
set (EXTERNAL_HYPERSCAN_LIBRARY_FOUND 1)
add_library (hyperscan UNKNOWN IMPORTED GLOBAL)
set_target_properties (hyperscan PROPERTIES IMPORTED_LOCATION ${LIBRARY_HYPERSCAN})
add_library (hyperscan INTERFACE)
set_target_properties (hyperscan PROPERTIES INTERFACE_LINK_LIBRARIES ${LIBRARY_HYPERSCAN})
set_target_properties (hyperscan PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_HYPERSCAN})
set_property(TARGET hyperscan APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_HYPERSCAN=1)
else ()

2
contrib/jemalloc vendored

@ -1 +1 @@
Subproject commit 026764f19995c53583ab25a3b9c06a2fd74e4689
Subproject commit 93e27e435cac846028da20cd9b0841fbc9110bd2

View File

@ -9,10 +9,6 @@ else()
endif ()
if (NOT ENABLE_JEMALLOC)
if(USE_INTERNAL_JEMALLOC_LIBRARY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal jemalloc with ENABLE_JEMALLOC=OFF")
endif()
add_library(jemalloc INTERFACE)
target_compile_definitions(jemalloc INTERFACE USE_JEMALLOC=0)
@ -24,50 +20,7 @@ if (NOT OS_LINUX)
message (WARNING "jemalloc support on non-linux is EXPERIMENTAL")
endif()
option (USE_INTERNAL_JEMALLOC_LIBRARY "Use internal jemalloc library" ${NOT_UNBUNDLED})
if (NOT USE_INTERNAL_JEMALLOC_LIBRARY)
find_library(LIBRARY_JEMALLOC jemalloc)
find_path(INCLUDE_JEMALLOC jemalloc/jemalloc.h)
if (LIBRARY_JEMALLOC AND INCLUDE_JEMALLOC)
set(EXTERNAL_JEMALLOC_LIBRARY_FOUND 1)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads)
set (CMAKE_REQUIRED_LIBRARIES ${LIBRARY_JEMALLOC} Threads::Threads "dl")
set (CMAKE_REQUIRED_INCLUDES ${INCLUDE_JEMALLOC})
check_cxx_source_compiles (
"
#include <jemalloc/jemalloc.h>
int main() {
free(mallocx(1, 0));
}
"
EXTERNAL_JEMALLOC_LIBRARY_WORKS
)
if (EXTERNAL_JEMALLOC_LIBRARY_WORKS)
add_library (jemalloc STATIC IMPORTED)
set_property (TARGET jemalloc PROPERTY IMPORTED_LOCATION ${LIBRARY_JEMALLOC})
set_property (TARGET jemalloc PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_JEMALLOC})
set_property (TARGET jemalloc PROPERTY INTERFACE_LINK_LIBRARIES Threads::Threads dl)
else()
message (${RECONFIGURE_MESSAGE_LEVEL} "External jemalloc is unusable: ${LIBRARY_JEMALLOC} ${INCLUDE_JEMALLOC}")
endif ()
else()
set(EXTERNAL_JEMALLOC_LIBRARY_FOUND 0)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system jemalloc")
endif()
endif ()
if (NOT EXTERNAL_JEMALLOC_LIBRARY_FOUND OR NOT EXTERNAL_JEMALLOC_LIBRARY_WORKS)
set(USE_INTERNAL_JEMALLOC_LIBRARY 1)
if (OS_LINUX)
if (OS_LINUX)
# ThreadPool select job randomly, and there can be some threads that had been
# performed some memory heavy task before and will be inactive for some time,
# but until it will became active again, the memory will not be freed since by
@ -82,20 +35,20 @@ if (NOT EXTERNAL_JEMALLOC_LIBRARY_FOUND OR NOT EXTERNAL_JEMALLOC_LIBRARY_WORKS)
# MADV_DONTNEED. See
# https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation.
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:10000")
else()
else()
set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:10000")
endif()
# CACHE variable is empty, to allow changing defaults without necessity
# to purge cache
set (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE "" CACHE STRING "Change default configuration string of JEMalloc" )
if (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE)
endif()
# CACHE variable is empty, to allow changing defaults without necessity
# to purge cache
set (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE "" CACHE STRING "Change default configuration string of JEMalloc" )
if (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE)
set (JEMALLOC_CONFIG_MALLOC_CONF "${JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE}")
endif()
message (STATUS "jemalloc malloc_conf: ${JEMALLOC_CONFIG_MALLOC_CONF}")
endif()
message (STATUS "jemalloc malloc_conf: ${JEMALLOC_CONFIG_MALLOC_CONF}")
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/jemalloc")
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/jemalloc")
set (SRCS
set (SRCS
${LIBRARY_DIR}/src/arena.c
${LIBRARY_DIR}/src/background_thread.c
${LIBRARY_DIR}/src/base.c
@ -129,58 +82,55 @@ if (NOT EXTERNAL_JEMALLOC_LIBRARY_FOUND OR NOT EXTERNAL_JEMALLOC_LIBRARY_WORKS)
${LIBRARY_DIR}/src/tsd.c
${LIBRARY_DIR}/src/witness.c
${LIBRARY_DIR}/src/safety_check.c
)
if (OS_DARWIN)
)
if (OS_DARWIN)
list(APPEND SRCS ${LIBRARY_DIR}/src/zone.c)
endif ()
endif ()
add_library(jemalloc ${SRCS})
target_include_directories(jemalloc PRIVATE ${LIBRARY_DIR}/include)
target_include_directories(jemalloc SYSTEM PUBLIC include)
add_library(jemalloc ${SRCS})
target_include_directories(jemalloc PRIVATE ${LIBRARY_DIR}/include)
target_include_directories(jemalloc SYSTEM PUBLIC include)
set (JEMALLOC_INCLUDE_PREFIX)
# OS_
if (OS_LINUX)
set (JEMALLOC_INCLUDE_PREFIX)
# OS_
if (OS_LINUX)
set (JEMALLOC_INCLUDE_PREFIX "include_linux")
elseif (OS_FREEBSD)
elseif (OS_FREEBSD)
set (JEMALLOC_INCLUDE_PREFIX "include_freebsd")
elseif (OS_DARWIN)
elseif (OS_DARWIN)
set (JEMALLOC_INCLUDE_PREFIX "include_darwin")
else ()
else ()
message (FATAL_ERROR "internal jemalloc: This OS is not supported")
endif ()
# ARCH_
if (ARCH_AMD64)
endif ()
# ARCH_
if (ARCH_AMD64)
set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_x86_64")
elseif (ARCH_ARM)
elseif (ARCH_ARM)
set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_aarch64")
else ()
else ()
message (FATAL_ERROR "internal jemalloc: This arch is not supported")
endif ()
endif ()
configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in
configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in
${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h)
target_include_directories(jemalloc SYSTEM PRIVATE
target_include_directories(jemalloc SYSTEM PRIVATE
${CMAKE_CURRENT_BINARY_DIR}/${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal)
target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE)
target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE)
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_DEBUG=1 -DJEMALLOC_PROF=1)
if (USE_UNWIND)
target_compile_definitions (jemalloc PRIVATE -DJEMALLOC_PROF_LIBUNWIND=1)
target_link_libraries (jemalloc PRIVATE unwind)
endif ()
endif ()
target_compile_options(jemalloc PRIVATE -Wno-redundant-decls)
# for RTLD_NEXT
target_compile_options(jemalloc PRIVATE -D_GNU_SOURCE)
set (USE_INTERNAL_JEMALLOC_LIBRARY 1)
endif ()
target_compile_options(jemalloc PRIVATE -Wno-redundant-decls)
# for RTLD_NEXT
target_compile_options(jemalloc PRIVATE -D_GNU_SOURCE)
set_property(TARGET jemalloc APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_JEMALLOC=1)
if (MAKE_STATIC_LIBRARIES)
# To detect whether we need to register jemalloc for osx as default zone.

2
contrib/libhdfs3 vendored

@ -1 +1 @@
Subproject commit 24b058c356794ef6cc2d31323dc9adf0386652ff
Subproject commit 30552ac527f2c14070d834e171493b2e7f662375

View File

@ -6,8 +6,8 @@ if (NOT USE_INTERNAL_LZ4_LIBRARY)
if (LIBRARY_LZ4 AND INCLUDE_LZ4)
set(EXTERNAL_LZ4_LIBRARY_FOUND 1)
add_library (lz4 UNKNOWN IMPORTED)
set_property (TARGET lz4 PROPERTY IMPORTED_LOCATION ${LIBRARY_LZ4})
add_library (lz4 INTERFACE)
set_property (TARGET lz4 PROPERTY INTERFACE_LINK_LIBRARIES ${LIBRARY_LZ4})
set_property (TARGET lz4 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_LZ4})
set_property (TARGET lz4 APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_XXHASH=0)
else()

2
contrib/openssl vendored

@ -1 +1 @@
Subproject commit 07e9623064508d15dd61367f960ebe7fc9aecd77
Subproject commit 237260dd6a4bca5cb5a321d366a8a9c807957455

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit 94b1f568d16183214d26c7c0e9ce69a4ce407f65
Subproject commit 8cf626c04e9a74313fb0b474cdbe2297c0f3cdc8

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (20.10.1.1) unstable; urgency=low
clickhouse (20.11.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Tue, 08 Sep 2020 17:04:39 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Sat, 10 Oct 2020 18:39:55 +0300

View File

@ -153,82 +153,19 @@ initdb()
start()
{
[ -x $CLICKHOUSE_BINDIR/$PROGRAM ] || exit 0
local EXIT_STATUS
EXIT_STATUS=0
echo -n "Start $PROGRAM service: "
if is_running; then
echo -n "already running "
EXIT_STATUS=1
else
ulimit -n 262144
mkdir -p $CLICKHOUSE_PIDDIR
chown -R $CLICKHOUSE_USER:$CLICKHOUSE_GROUP $CLICKHOUSE_PIDDIR
initdb
if ! is_running; then
# Lock should not be held while running child process, so we release the lock. Note: obviously, there is race condition.
# But clickhouse-server has protection from simultaneous runs with same data directory.
su -s $SHELL ${CLICKHOUSE_USER} -c "$FLOCK -u 9; $CLICKHOUSE_PROGRAM_ENV exec -a \"$PROGRAM\" \"$CLICKHOUSE_BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\""
EXIT_STATUS=$?
if [ $EXIT_STATUS -ne 0 ]; then
return $EXIT_STATUS
fi
fi
fi
if [ $EXIT_STATUS -eq 0 ]; then
attempts=0
while ! is_running && [ $attempts -le ${CLICKHOUSE_START_TIMEOUT:=10} ]; do
attempts=$(($attempts + 1))
sleep 1
done
if is_running; then
echo "DONE"
else
echo "UNKNOWN"
fi
else
echo "FAILED"
fi
return $EXIT_STATUS
${CLICKHOUSE_GENERIC_PROGRAM} start --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}
stop()
{
#local EXIT_STATUS
EXIT_STATUS=0
if [ -f $CLICKHOUSE_PIDFILE ]; then
echo -n "Stop $PROGRAM service: "
kill -TERM $(cat "$CLICKHOUSE_PIDFILE")
if ! wait_for_done ${CLICKHOUSE_STOP_TIMEOUT}; then
EXIT_STATUS=2
echo "TIMEOUT"
else
echo "DONE"
fi
fi
return $EXIT_STATUS
${CLICKHOUSE_GENERIC_PROGRAM} stop --pid-path "${CLICKHOUSE_PIDDIR}"
}
restart()
{
check_config
if stop; then
if start; then
return 0
fi
fi
return 1
${CLICKHOUSE_GENERIC_PROGRAM} restart --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}

View File

@ -2,6 +2,7 @@
set -e
# set -x
PROGRAM=clickhouse-server
CLICKHOUSE_USER=${CLICKHOUSE_USER:=clickhouse}
CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP:=${CLICKHOUSE_USER}}
# Please note that we don't support paths with whitespaces. This is rather ignorant.
@ -12,6 +13,7 @@ CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
[ -f /etc/default/clickhouse ] && . /etc/default/clickhouse
@ -41,105 +43,5 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
fi
fi
# Make sure the administrative user exists
if ! getent passwd ${CLICKHOUSE_USER} > /dev/null; then
if [ -n "$not_deb_os" ]; then
useradd -r -s /bin/false --home-dir /nonexistent ${CLICKHOUSE_USER} > /dev/null
else
adduser --system --disabled-login --no-create-home --home /nonexistent \
--shell /bin/false --group --gecos "ClickHouse server" ${CLICKHOUSE_USER} > /dev/null
fi
fi
# if the user was created manually, make sure the group is there as well
if ! getent group ${CLICKHOUSE_GROUP} > /dev/null; then
groupadd -r ${CLICKHOUSE_GROUP} > /dev/null
fi
# make sure user is in the correct group
if ! id -Gn ${CLICKHOUSE_USER} | grep -qw ${CLICKHOUSE_USER}; then
usermod -a -G ${CLICKHOUSE_GROUP} ${CLICKHOUSE_USER} > /dev/null
fi
# check validity of user and group
if [ "$(id -u ${CLICKHOUSE_USER})" -eq 0 ]; then
echo "The ${CLICKHOUSE_USER} system user must not have uid 0 (root).
Please fix this and reinstall this package." >&2
exit 1
fi
if [ "$(id -g ${CLICKHOUSE_GROUP})" -eq 0 ]; then
echo "The ${CLICKHOUSE_USER} system user must not have root as primary group.
Please fix this and reinstall this package." >&2
exit 1
fi
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ] && [ -f "$CLICKHOUSE_CONFIG" ]; then
if [ -z "$SHELL" ]; then
SHELL="/bin/sh"
fi
CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") ||:
echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
fi
CLICKHOUSE_DATADIR_FROM_CONFIG=${CLICKHOUSE_DATADIR_FROM_CONFIG:=$CLICKHOUSE_DATADIR}
if [ ! -d ${CLICKHOUSE_DATADIR_FROM_CONFIG} ]; then
mkdir -p ${CLICKHOUSE_DATADIR_FROM_CONFIG}
chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_DATADIR_FROM_CONFIG}
chmod 700 ${CLICKHOUSE_DATADIR_FROM_CONFIG}
fi
if [ -d ${CLICKHOUSE_CONFDIR} ]; then
mkdir -p ${CLICKHOUSE_CONFDIR}/users.d
mkdir -p ${CLICKHOUSE_CONFDIR}/config.d
rm -fv ${CLICKHOUSE_CONFDIR}/*-preprocessed.xml ||:
fi
[ -e ${CLICKHOUSE_CONFDIR}/preprocessed ] || ln -s ${CLICKHOUSE_DATADIR_FROM_CONFIG}/preprocessed_configs ${CLICKHOUSE_CONFDIR}/preprocessed ||:
if [ ! -d ${CLICKHOUSE_LOGDIR} ]; then
mkdir -p ${CLICKHOUSE_LOGDIR}
chown root:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}
# Allow everyone to read logs, root and clickhouse to read-write
chmod 775 ${CLICKHOUSE_LOGDIR}
fi
# Set net_admin capabilities to support introspection of "taskstats" performance metrics from the kernel
# and ipc_lock capabilities to allow mlock of clickhouse binary.
# 1. Check that "setcap" tool exists.
# 2. Check that an arbitrary program with installed capabilities can run.
# 3. Set the capabilities.
# The second is important for Docker and systemd-nspawn.
# When the container has no capabilities,
# but the executable file inside the container has capabilities,
# then attempt to run this file will end up with a cryptic "Operation not permitted" message.
TMPFILE=/tmp/test_setcap.sh
command -v setcap >/dev/null \
&& echo > $TMPFILE && chmod a+x $TMPFILE && $TMPFILE && setcap "cap_net_admin,cap_ipc_lock,cap_sys_nice+ep" $TMPFILE && $TMPFILE && rm $TMPFILE \
&& setcap "cap_net_admin,cap_ipc_lock,cap_sys_nice+ep" "${CLICKHOUSE_BINDIR}/${CLICKHOUSE_GENERIC_PROGRAM}" \
|| echo "Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary. This is optional. Taskstats accounting will be disabled. To enable taskstats accounting you may add the required capability later manually."
# Clean old dynamic compilation results
if [ -d "${CLICKHOUSE_DATADIR_FROM_CONFIG}/build" ]; then
rm -f ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.cpp ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.so ||:
fi
if [ -f /usr/share/debconf/confmodule ]; then
db_get clickhouse-server/default-password
defaultpassword="$RET"
if [ -n "$defaultpassword" ]; then
echo "<yandex><users><default><password>$defaultpassword</password></default></users></yandex>" > ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
chmod 600 ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
fi
# everything went well, so now let's reset the password
db_set clickhouse-server/default-password ""
# ... done with debconf here
db_stop
fi
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
fi

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.10.1.*
ARG version=20.11.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -31,14 +31,10 @@ RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
&& chmod +x dpkg-deb \
&& cp dpkg-deb /usr/bin
ENV APACHE_PUBKEY_HASH="bba6987b63c63f710fd4ed476121c588bc3812e99659d27a855f8c4d312783ee66ad6adfce238765691b04d62fa3688f"
RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& wget -nv -O /tmp/arrow-keyring.deb "https://apache.bintray.com/arrow/ubuntu/apache-arrow-archive-keyring-latest-${CODENAME}.deb" \
&& echo "${APACHE_PUBKEY_HASH} /tmp/arrow-keyring.deb" | sha384sum -c \
&& dpkg -i /tmp/arrow-keyring.deb
# Libraries from OS are only needed to test the "unbundled" build (this is not used in production).
RUN apt-get update \
&& apt-get install \

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.10.1.*
ARG version=20.11.1.*
ARG gosu_ver=1.10
RUN apt-get update \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.10.1.*
ARG version=20.11.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -52,6 +52,7 @@ RUN apt-get update \
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7'" >> /etc/environment; \
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
echo "MSAN_OPTIONS='abort_on_error=1'" >> /etc/environment; \
echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment; \
ln -s /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-symbolizer /usr/bin/llvm-symbolizer;
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
# (but w/o verbosity for TSAN, otherwise test.reference will not match)

View File

@ -56,7 +56,6 @@ RUN apt-get update \
python3-lxml \
python3-requests \
python3-termcolor \
qemu-user-static \
rename \
software-properties-common \
tzdata \

View File

@ -33,6 +33,12 @@ server_pid=none
function stop_server
{
if ! kill -0 -- "$server_pid"
then
echo "ClickHouse server pid '$server_pid' is not running"
return 0
fi
for _ in {1..60}
do
if ! pkill -f "clickhouse-server" && ! kill -- "$server_pid" ; then break ; fi
@ -168,8 +174,8 @@ clickhouse-test --help
mkdir -p "$FASTTEST_DATA"{,/client-config}
cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA"
cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA"
"$FASTTEST_SOURCE/tests/config/install.sh" "$FASTTEST_DATA" "$FASTTEST_DATA/client-config"
cp -a "$FASTTEST_SOURCE/programs/server/config.d/log_to_console.xml" "$FASTTEST_DATA/config.d"
# doesn't support SSL
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
}
@ -185,63 +191,67 @@ stop_server ||:
start_server
TESTS_TO_SKIP=(
parquet
avro
h3
odbc
mysql
sha256
_orc_
arrow
01098_temporary_and_external_tables
01083_expressions_in_engine_arguments
hdfs
00911_tautological_compare
protobuf
capnproto
java_hash
hashing
secure
00490_special_line_separators_and_characters_outside_of_bmp
00436_convert_charset
00105_shard_collations
01354_order_by_tuple_collate_const
01292_create_user
01098_msgpack_format
00929_multi_match_edit_distance
00926_multimatch
00834_cancel_http_readonly_queries_on_client_close
brotli
parallel_alter
00109_shard_totals_after_having
00110_external_sort
00302_http_compression
00417_kill_query
01294_lazy_database_concurrent
01193_metadata_loading
base64
01031_mutations_interpreter_and_context
json
client
01305_replica_create_drop_zookeeper
01092_memory_profiler
01355_ilike
01281_unsucceeded_insert_select_queries_counter
live_view
limit_memory
memory_limit
memory_leak
00110_external_sort
00436_convert_charset
00490_special_line_separators_and_characters_outside_of_bmp
00652_replicated_mutations_zookeeper
00682_empty_parts_merge
00701_rollup
00109_shard_totals_after_having
ddl_dictionaries
00834_cancel_http_readonly_queries_on_client_close
00911_tautological_compare
00926_multimatch
00929_multi_match_edit_distance
01031_mutations_interpreter_and_context
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
01083_expressions_in_engine_arguments
01092_memory_profiler
01098_msgpack_format
01098_temporary_and_external_tables
01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally
01193_metadata_loading
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01251_dict_is_in_infinite_loop
01259_dictionary_custom_settings_ddl
01268_dictionary_direct_layout
01280_ssd_complex_key_dictionary
00652_replicated_mutations_zookeeper
01411_bayesian_ab_testing
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01318_encrypt # Depends on OpenSSL
01318_decrypt # Depends on OpenSSL
01281_unsucceeded_insert_select_queries_counter
01292_create_user
01294_lazy_database_concurrent
01305_replica_create_drop_zookeeper
01354_order_by_tuple_collate_const
01355_ilike
01411_bayesian_ab_testing
_orc_
arrow
avro
base64
brotli
capnproto
client
ddl_dictionaries
h3
hashing
hdfs
java_hash
json
limit_memory
live_view
memory_leak
memory_limit
mysql
odbc
parallel_alter
parquet
protobuf
secure
sha256
# Not sure why these two fail even in sequential mode. Disabled for now
# to make some progress.
@ -252,7 +262,7 @@ TESTS_TO_SKIP=(
01460_DistributedFilesToInsert
)
time clickhouse-test -j 8 --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
# substr is to remove semicolon after test name
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
@ -269,13 +279,13 @@ then
stop_server ||:
# Clean the data so that there is no interference from the previous test run.
rm -rf "$FASTTEST_DATA"/{meta,}data ||:
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
start_server
echo "Going to run again: ${FAILED_TESTS[*]}"
clickhouse-test --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
else
echo "No failed tests"
fi
@ -329,7 +339,10 @@ case "$stage" in
;&
"run_tests")
run_tests
;&
;;
*)
echo "Unknown test stage '$stage'"
exit 1
esac
pstree -apgT

View File

@ -37,7 +37,28 @@ RUN apt-get update \
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN python3 -m pip install urllib3==1.23 pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2==2.7.5 pymongo tzlocal kafka-python protobuf redis aerospike pytest-timeout minio grpcio grpcio-tools cassandra-driver confluent-kafka avro
RUN python3 -m pip install \
PyMySQL \
aerospike \
avro \
cassandra-driver \
confluent-kafka \
dicttoxml \
docker \
docker-compose==1.22.0 \
grpcio \
grpcio-tools \
kafka-python \
kazoo \
minio \
protobuf \
psycopg2-binary==2.7.5 \
pymongo \
pytest \
pytest-timeout \
redis \
tzlocal \
urllib3
ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 17.09.1-ce

View File

@ -48,12 +48,13 @@ This table shows queries that take significantly longer to process on the client
#### Unexpected Query Duration
Action required for every item -- these are errors that must be fixed.
Queries that have "short" duration (on the order of 0.1 s) can't be reliably tested in a normal way, where we perform a small (about ten) measurements for each server, because the signal-to-noise ratio is much smaller. There is a special mode for such queries that instead runs them for a fixed amount of time, normally with much higher number of measurements (up to thousands). This mode must be explicitly enabled by the test author to avoid accidental errors. It must be used only for queries that are meant to complete "immediately", such as `select count(*)`. If your query is not supposed to be "immediate", try to make it run longer, by e.g. processing more data.
A query is supposed to run longer than 0.1 second. If your query runs faster, increase the amount of processed data to bring the run time above this threshold. You can use a bigger table (e.g. `hits_100m` instead of `hits_10m`), increase a `LIMIT`, make a query single-threaded, and so on. Queries that are too fast suffer from poor stability and precision.
This table shows queries for which the "short" marking is not consistent with the actual query run time -- i.e., a query runs for a long time but is marked as short, or it runs very fast but is not marked as short.
Sometimes you want to test a query that is supposed to complete "instantaneously", i.e. in sublinear time. This might be `count(*)`, or parsing a complicated tuple. It might not be practical or even possible to increase the run time of such queries by adding more data. For such queries there is a specal comparison mode which runs them for a fixed amount of time, instead of a fixed number of iterations like we do normally. This mode is inferior to the normal mode, because the influence of noise and overhead is higher, which leads to less precise and stable results.
If your query is really supposed to complete "immediately" and can't be made to run longer, you have to mark it as "short". To do so, write `<query short="1">...` in the test file. The value of "short" attribute is evaluated as a python expression, and substitutions are performed, so you can write something like `<query short="{column1} = {column2}">select count(*) from table where {column1} > {column2}</query>`, to mark only a particular combination of variables as short.
If it is impossible to increase the run time of a query and it is supposed to complete "immediately", you have to explicitly mark this in the test. To do so, add a `short` attribute to the query tag in the test file: `<query short="1">...`. The value of the `short` attribute is evaluated as a python expression, and substitutions are performed, so you can write something like `<query short="{column1} = {column2}">select count(*) from table where {column1} > {column2}</query>`, to mark only a particular combination of variables as short.
This table shows queries for which the `short` marking is not consistent with the actual query run time -- i.e., a query runs for a normal time but is marked as `short`, or it runs faster than normal but is not marked as `short`.
#### Partial Queries
Action required for the cells marked in red.

View File

@ -469,12 +469,12 @@ if args.report == 'main':
columns = [
'Test', #0
'Wall clock time,&nbsp;s', #1
'Total client time,&nbsp;s', #2
'Total queries', #3
'Longest query<br>(sum for all runs),&nbsp;s', #4
'Avg wall clock time<br>(sum for all runs),&nbsp;s', #5
'Shortest query<br>(sum for all runs),&nbsp;s', #6
'Wall clock time, entire test,&nbsp;s', #1
'Total client time for measured query runs,&nbsp;s', #2
'Queries', #3
'Longest query, total for measured runs,&nbsp;s', #4
'Wall clock time per query,&nbsp;s', #5
'Shortest query, total for measured runs,&nbsp;s', #6
'', # Runs #7
]
attrs = ['' for c in columns]

View File

@ -48,4 +48,8 @@ if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test ; then
SKIP_LIST_OPT="--use-skip-list"
fi
clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
# We can have several additional options so we path them as array because it's
# more idiologically correct.
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt

View File

@ -105,7 +105,11 @@ if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
SKIP_LIST_OPT="--use-skip-list"
fi
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
# We can have several additional options so we path them as array because it's
# more idiologically correct.
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
kill_clickhouse

View File

@ -33,5 +33,8 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ENV NUM_TRIES=1
ENV MAX_RUN_TIME=0
COPY run.sh /
CMD ["/bin/bash", "/run.sh"]

View File

@ -1,6 +1,7 @@
#!/bin/bash
set -e -x
# fail on errors, verbose and export all env variables
set -e -x -a
dpkg -i package_folder/clickhouse-common-static_*.deb
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
@ -16,5 +17,17 @@ service clickhouse-server start && sleep 5
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
SKIP_LIST_OPT="--use-skip-list"
fi
# We can have several additional options so we path them as array because it's
# more idiologically correct.
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
function run_tests()
{
for i in $(seq 1 $NUM_TRIES); do
clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt
done
}
export -f run_tests
timeout $MAX_RUN_TIME bash -c run_tests ||:

View File

@ -51,7 +51,11 @@ if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
SKIP_LIST_OPT="--use-skip-list"
fi
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
# We can have several additional options so we path them as array because it's
# more idiologically correct.
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
kill_clickhouse

View File

@ -45,7 +45,7 @@ function start()
# for clickhouse-server (via service)
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
# for clickhouse-client
export ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'
export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
start

View File

@ -28,8 +28,18 @@ def get_options(i):
options = ""
if 0 < i:
options += " --order=random"
if i % 2 == 1:
options += " --db-engine=Ordinary"
# If database name is not specified, new database is created for each functional test.
# Run some threads with one database for all tests.
if i % 3 == 1:
options += " --database=test_{}".format(i)
if i == 13:
options += " --client-option='memory_tracker_fault_probability=0.00001'"
return options

View File

@ -45,7 +45,7 @@ A `Block` is a container that represents a subset (chunk) of a table in memory.
When we calculate some function over columns in a block, we add another column with its result to the block, and we dont touch columns for arguments of the function because operations are immutable. Later, unneeded columns can be removed from the block, but not modified. It is convenient for the elimination of common subexpressions.
Blocks are created for every processed chunk of data. Note that for the same type of calculation, the column names and types remain the same for different blocks, and only column data changes. It is better to split block data from the block header because small block sizes have a high overhead of temporary strings for copying shared\_ptrs and column names.
Blocks are created for every processed chunk of data. Note that for the same type of calculation, the column names and types remain the same for different blocks, and only column data changes. It is better to split block data from the block header because small block sizes have a high overhead of temporary strings for copying shared_ptrs and column names.
## Block Streams {#block-streams}

View File

@ -7,7 +7,7 @@ toc_title: Build on Linux
Supported platforms:
- x86\_64
- x86_64
- AArch64
- Power9 (experimental)

View File

@ -26,7 +26,7 @@ toc_title: Third-Party Libraries Used
| libpcg-random | [Apache License 2.0](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libpcg-random/LICENSE-APACHE.txt) |
| libressl | [OpenSSL License](https://github.com/ClickHouse-Extras/ssl/blob/master/COPYING) |
| librdkafka | [BSD 2-Clause License](https://github.com/edenhill/librdkafka/blob/363dcad5a23dc29381cc626620e68ae418b3af19/LICENSE) |
| libwidechar\_width | [CC0 1.0 Universal](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libwidechar_width/LICENSE) |
| libwidechar_width | [CC0 1.0 Universal](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libwidechar_width/LICENSE) |
| llvm | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/llvm/blob/163def217817c90fb982a6daf384744d8472b92b/llvm/LICENSE.TXT) |
| lz4 | [BSD 2-Clause License](https://github.com/lz4/lz4/blob/c10863b98e1503af90616ae99725ecd120265dfb/LICENSE) |
| mariadb-connector-c | [LGPL v2.1](https://github.com/ClickHouse-Extras/mariadb-connector-c/blob/3.1/COPYING.LIB) |

View File

@ -40,7 +40,7 @@ In the command line terminal run:
git clone --recursive git@github.com:your_github_username/ClickHouse.git
cd ClickHouse
Note: please, substitute *your\_github\_username* with what is appropriate!
Note: please, substitute *your_github_username* with what is appropriate!
This command will create a directory `ClickHouse` containing the working copy of the project.
@ -150,7 +150,7 @@ Now that you are ready to build ClickHouse we recommend you to create a separate
mkdir build
cd build
You can have several different directories (build\_release, build\_debug, etc.) for different types of build.
You can have several different directories (build_release, build_debug, etc.) for different types of build.
While inside the `build` directory, configure your build by running CMake. Before the first run, you need to define environment variables that specify compiler (version 9 gcc compiler in this example).

View File

@ -354,7 +354,7 @@ In all other cases, use a name that describes the meaning.
bool info_successfully_loaded = false;
```
**9.** Names of `define`s and global constants use ALL\_CAPS with underscores.
**9.** Names of `define`s and global constants use ALL_CAPS with underscores.
``` cpp
#define MAX_SRC_TABLE_NAMES_TO_STORE 1000
@ -394,7 +394,7 @@ The underscore suffix can be omitted if the argument is not used in the construc
timer (not m_timer)
```
**14.** For the constants in an `enum`, use CamelCase with a capital letter. ALL\_CAPS is also acceptable. If the `enum` is non-local, use an `enum class`.
**14.** For the constants in an `enum`, use CamelCase with a capital letter. ALL_CAPS is also acceptable. If the `enum` is non-local, use an `enum class`.
``` cpp
enum class CompressionMethod
@ -707,7 +707,7 @@ The standard library is used (`libc++`).
**4.**OS: Linux Ubuntu, not older than Precise.
**5.**Code is written for x86\_64 CPU architecture.
**5.**Code is written for x86_64 CPU architecture.
The CPU instruction set is the minimum supported set among our servers. Currently, it is SSE 4.2.

View File

@ -220,7 +220,7 @@ Debug version of `jemalloc` is used for debug build.
ClickHouse fuzzing is implemented both using [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and random SQL queries.
All the fuzz testing should be performed with sanitizers (Address and Undefined).
LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have “\_fuzzer” name postfixes.
LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have “_fuzzer” name postfixes.
Fuzzer example can be found at `src/Parsers/tests/lexer_fuzzer.cpp`. LibFuzzer-specific configs, dictionaries and corpus are stored at `tests/fuzz`.
We encourage you to write fuzz tests for every functionality that handles user input.

View File

@ -71,12 +71,12 @@ Constructions with `{}` are similar to the [remote](../../../sql-reference/table
1. Suppose we have several files in TSV format with the following URIs on HDFS:
- hdfs://hdfs1:9000/some\_dir/some\_file\_1
- hdfs://hdfs1:9000/some\_dir/some\_file\_2
- hdfs://hdfs1:9000/some\_dir/some\_file\_3
- hdfs://hdfs1:9000/another\_dir/some\_file\_1
- hdfs://hdfs1:9000/another\_dir/some\_file\_2
- hdfs://hdfs1:9000/another\_dir/some\_file\_3
- hdfs://hdfs1:9000/some_dir/some_file_1
- hdfs://hdfs1:9000/some_dir/some_file_2
- hdfs://hdfs1:9000/some_dir/some_file_3
- hdfs://hdfs1:9000/another_dir/some_file_1
- hdfs://hdfs1:9000/another_dir/some_file_2
- hdfs://hdfs1:9000/another_dir/some_file_3
1. There are several ways to make a table consisting of all six files:

View File

@ -134,7 +134,7 @@ Example:
SELECT level, sum(total) FROM daily GROUP BY level;
```
To improve performance, received messages are grouped into blocks the size of [max\_insert\_block\_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size). If the block wasnt formed within [stream\_flush\_interval\_ms](../../../operations/server-configuration-parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size). If the block wasnt formed within [stream_flush_interval_ms](../../../operations/server-configuration-parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
To stop receiving topic data or to change the conversion logic, detach the materialized view:
@ -192,6 +192,6 @@ Example:
**See Also**
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
- [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) <!--hide-->

View File

@ -52,9 +52,9 @@ Optional parameters:
- `rabbitmq_schema` Parameter that must be used if the format requires a schema definition. For example, [Capn Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
- `rabbitmq_num_consumers` The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient.
- `rabbitmq_num_queues` The number of queues per consumer. Default: `1`. Specify more queues if the capacity of one queue per consumer is insufficient.
- `rabbitmq_queue_base` - Specify a base name for queues that will be declared. By default, queues are declared unique to tables based on db and table names.
- `rabbitmq_queue_base` - Specify a hint for queue names. Use cases of this setting are described below.
- `rabbitmq_deadletter_exchange` - Specify name for a [dead letter exchange](https://www.rabbitmq.com/dlx.html). You can create another table with this exchange name and collect messages in cases when they are republished to dead letter exchange. By default dead letter exchange is not specified.
- `persistent` - If set to 1 (true), in insert query delivery mode will be set to 2 (marks messages as 'persistent'). Default: `0`.
- `rabbitmq_persistent` - If set to 1 (true), in insert query delivery mode will be set to 2 (marks messages as 'persistent'). Default: `0`.
- `rabbitmq_skip_broken_messages` RabbitMQ message parser tolerance to schema-incompatible messages per block. Default: `0`. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data).
- `rabbitmq_max_block_size`
- `rabbitmq_flush_interval_ms`
@ -102,14 +102,14 @@ Exchange type options:
- `fanout` - Routing to all tables (where exchange name is the same) regardless of the keys.
- `topic` - Routing is based on patterns with dot-separated keys. Examples: `*.logs`, `records.*.*.2020`, `*.2018,*.2019,*.2020`.
- `headers` - Routing is based on `key=value` matches with a setting `x-match=all` or `x-match=any`. Example table key list: `x-match=all,format=logs,type=report,year=2020`.
- `consistent-hash` - Data is evenly distributed between all bound tables (where the exchange name is the same). Note that this exchange type must be enabled with RabbitMQ plugin: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`.
- `consistent_hash` - Data is evenly distributed between all bound tables (where the exchange name is the same). Note that this exchange type must be enabled with RabbitMQ plugin: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`.
Setting `rabbitmq_queue_base` may be used for the following cases:
- to let different tables share queues, so that multiple consumers could be registered for the same queues, which makes a better performance. If using `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings, the exact match of queues is achieved in case these parameters are the same.
- to be able to restore reading from certain durable queues when not all messages were successfully consumed. To be able to resume consumption from one specific queue - set its name in `rabbitmq_queue_base` setting and do not specify `rabbitmq_num_consumers` and `rabbitmq_num_queues` (defaults to 1). To be able to resume consumption from all queues, which were declared for a specific table - just specify the same settings: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. By default, queue names will be unique to tables. Note: it makes sence only if messages are sent with delivery mode 2 - marked 'persistent', durable.
- to reuse queues as they are declared durable and not auto-deleted.
- to be able to restore reading from certain durable queues when not all messages were successfully consumed. To resume consumption from one specific queue - set its name in `rabbitmq_queue_base` setting and do not specify `rabbitmq_num_consumers` and `rabbitmq_num_queues` (defaults to 1). To resume consumption from all queues, which were declared for a specific table - just specify the same settings: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. By default, queue names will be unique to tables.
- to reuse queues as they are declared durable and not auto-deleted. (Can be deleted via any of RabbitMQ CLI tools.)
To improve performance, received messages are grouped into blocks the size of [max\_insert\_block\_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size). If the block wasnt formed within [stream\_flush\_interval\_ms](../../../operations/server-configuration-parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size). If the block wasnt formed within [stream_flush_interval_ms](../../../operations/server-configuration-parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
If `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings are specified along with `rabbitmq_exchange_type`, then:

View File

@ -114,7 +114,7 @@ drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 12:09 201902_4_6_1
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 detached
```
The folders 201901\_1\_1\_0, 201901\_1\_7\_1 and so on are the directories of the parts. Each part relates to a corresponding partition and contains data just for a certain month (the table in this example has partitioning by month).
The folders 201901_1_1_0, 201901_1_7_1 and so on are the directories of the parts. Each part relates to a corresponding partition and contains data just for a certain month (the table in this example has partitioning by month).
The `detached` directory contains parts that were detached from the table using the [DETACH](../../../sql-reference/statements/alter/partition.md#alter_detach-partition) query. The corrupted parts are also moved to this directory, instead of being deleted. The server does not use the parts from the `detached` directory. You can add, delete, or modify the data in this directory at any time the server will not know about this until you run the [ATTACH](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query.

View File

@ -79,7 +79,7 @@ All of the parameters excepting `config_section` have the same meaning as in `Me
## Rollup Configuration {#rollup-configuration}
The settings for rollup are defined by the [graphite\_rollup](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) parameter in the server configuration. The name of the parameter could be any. You can create several configurations and use them for different tables.
The settings for rollup are defined by the [graphite_rollup](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) parameter in the server configuration. The name of the parameter could be any. You can create several configurations and use them for different tables.
Rollup configuration structure:

View File

@ -205,7 +205,7 @@ The number of columns in the primary key is not explicitly limited. Depending on
A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries.
You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max\_insert\_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max_insert_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
To select data in the initial order, use [single-threaded](../../../operations/settings/settings.md#settings-max_threads) `SELECT` queries.
@ -248,7 +248,7 @@ In the example below, the index cant be used.
SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%'
```
To check whether ClickHouse can use the index when running a query, use the settings [force\_index\_by\_date](../../../operations/settings/settings.md#settings-force_index_by_date) and [force\_primary\_key](../../../operations/settings/settings.md).
To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](../../../operations/settings/settings.md#settings-force_index_by_date) and [force_primary_key](../../../operations/settings/settings.md).
The key for partitioning by month allows reading only those data blocks which contain dates from the proper range. In this case, the data block may contain data for many dates (up to an entire month). Within a block, data is sorted by primary key, which might not contain the date as the first column. Because of this, using a query with only a date condition that does not specify the primary key prefix will cause more data to be read than for a single date.
@ -339,7 +339,7 @@ Conditions in the `WHERE` clause contains calls of the functions that operate wi
The `set` index can be used with all functions. Function subsets for other indexes are shown in the table below.
| Function (operator) / Index | primary key | minmax | ngrambf\_v1 | tokenbf\_v1 | bloom\_filter |
| Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
@ -505,7 +505,7 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be
- Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)).
- Storage policy — Set of volumes and the rules for moving data between them.
The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables/storage_policies.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables/disks.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
The names given to the described entities can be found in the system tables, [system.storage_policies](../../../operations/system-tables/storage_policies.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables/disks.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
### Configuration {#table_engine-mergetree-multiple-volumes_configure}
@ -635,7 +635,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`. Once a table is created, its storage policy cannot be changed.
The number of threads performing background moves of data parts can be changed by [background\_move\_pool\_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
### Details {#details}
@ -654,7 +654,7 @@ In all these cases except for mutations and partition freezing, a part is stored
Under the hood, mutations and partition freezing make use of [hard links](https://en.wikipedia.org/wiki/Hard_link). Hard links between different disks are not supported, therefore in such cases the resulting parts are stored on the same disks as the initial ones.
In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file.
Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
Data is never transferred from the last one and into the first one. One may use system tables [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter/partition.md#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met.

View File

@ -57,19 +57,19 @@ You can specify any existing ZooKeeper cluster and the system will use a directo
If ZooKeeper isnt set in the config file, you cant create replicated tables, and any existing replicated tables will be read-only.
ZooKeeper is not used in `SELECT` queries because replication does not affect the performance of `SELECT` and queries run just as fast as they do for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max\_replica\_delay\_for\_distributed\_queries](../../../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) and [fallback\_to\_stale\_replicas\_for\_distributed\_queries](../../../operations/settings/settings.md#settings-fallback_to_stale_replicas_for_distributed_queries).
ZooKeeper is not used in `SELECT` queries because replication does not affect the performance of `SELECT` and queries run just as fast as they do for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max_replica_delay_for_distributed_queries](../../../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) and [fallback_to_stale_replicas_for_distributed_queries](../../../operations/settings/settings.md#settings-fallback_to_stale_replicas_for_distributed_queries).
For each `INSERT` query, approximately ten entries are added to ZooKeeper through several transactions. (To be more precise, this is for each inserted block of data; an INSERT query contains one block or one block per `max_insert_block_size = 1048576` rows.) This leads to slightly longer latencies for `INSERT` compared to non-replicated tables. But if you follow the recommendations to insert data in batches of no more than one `INSERT` per second, it doesnt create any problems. The entire ClickHouse cluster used for coordinating one ZooKeeper cluster has a total of several hundred `INSERTs` per second. The throughput on data inserts (the number of rows per second) is just as high as for non-replicated data.
For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasnt proven necessary on the Yandex.Metrica cluster (approximately 300 servers).
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
By default, an INSERT query waits for confirmation of writing the data from only one replica. If the data was successfully written to only one replica and the server with this replica ceases to exist, the stored data will be lost. To enable getting confirmation of data writes from multiple replicas, use the `insert_quorum` option.
Each block of data is written atomically. The INSERT query is divided into blocks up to `max_insert_block_size = 1048576` rows. In other words, if the `INSERT` query has less than 1048576 rows, it is made atomically.
Data blocks are deduplicated. For multiple writes of the same data block (data blocks of the same size containing the same rows in the same order), the block is only written once. The reason for this is in case of network failures when the client application doesnt know if the data was written to the DB, so the `INSERT` query can simply be repeated. It doesnt matter which replica INSERTs were sent to with identical data. `INSERTs` are idempotent. Deduplication parameters are controlled by [merge\_tree](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-merge_tree) server settings.
Data blocks are deduplicated. For multiple writes of the same data block (data blocks of the same size containing the same rows in the same order), the block is only written once. The reason for this is in case of network failures when the client application doesnt know if the data was written to the DB, so the `INSERT` query can simply be repeated. It doesnt matter which replica INSERTs were sent to with identical data. `INSERTs` are idempotent. Deduplication parameters are controlled by [merge_tree](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-merge_tree) server settings.
During replication, only the source data to insert is transferred over the network. Further data transformation (merging) is coordinated and performed on all the replicas in the same way. This minimizes network usage, which means that replication works well when replicas reside in different datacenters. (Note that duplicating data in different datacenters is the main goal of replication.)
@ -117,7 +117,9 @@ CREATE TABLE table_name
</details>
As the example shows, these parameters can contain substitutions in curly brackets. The substituted values are taken from the macros section of the configuration file. Example:
As the example shows, these parameters can contain substitutions in curly brackets. The substituted values are taken from the «[macros](../../../operations/server-configuration-parameters/settings/#macros) section of the configuration file.
Example:
``` xml
<macros>
@ -137,6 +139,9 @@ In this case, the path consists of the following parts:
`table_name` is the name of the node for the table in ZooKeeper. It is a good idea to make it the same as the table name. It is defined explicitly, because in contrast to the table name, it doesnt change after a RENAME query.
*HINT*: you could add a database name in front of `table_name` as well. E.g. `db_name.table_name`
The two built-in substitutions `{database}` and `{table}` can be used, they expand into the table name and the database name respectively (unless these macros are defined in the `macros` section). So the zookeeper path can be specified as `'/clickhouse/tables/{layer}-{shard}/{database}/{table}'`.
Be careful with table renames when using these built-in substitutions. The path in Zookeeper cannot be changed, and when the table is renamed, the macros will expand into a different path, the table will refer to a path that does not exist in Zookeeper, and will go into read-only mode.
The replica name identifies different replicas of the same table. You can use the server name for this, as in the example. The name only needs to be unique within each shard.
You can define the parameters explicitly instead of using substitutions. This might be convenient for testing and for configuring small clusters. However, you cant use distributed DDL queries (`ON CLUSTER`) in this case.
@ -217,6 +222,6 @@ If the data in ZooKeeper was lost or damaged, you can save data by moving it to
**See also**
- [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/replication/) <!--hide-->

View File

@ -97,13 +97,13 @@ Cluster names must not contain dots.
The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server:
- `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesnt start. If you change the DNS record, restart the server.
- `port` The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http\_port.
- `port` The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http_port.
- `user` Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md).
- `password` The password for connecting to a remote server (not masked). Default value: empty string.
- `secure` - Use ssl for connection, usually you also should define `port` = 9440. Server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and have correct certificates.
- `compression` - Use data compression. Default value: true.
When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) see the [load\_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting.
When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) see the [load_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting.
If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times.
This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly.
@ -144,11 +144,11 @@ You should be concerned about the sharding scheme in the following cases:
- Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient.
- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as weve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used.
When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
## Virtual Columns {#virtual-columns}
@ -160,6 +160,6 @@ When the `max_parallel_replicas` option is enabled, query processing is parallel
**See Also**
- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns)
- [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)
- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/distributed/) <!--hide-->

View File

@ -25,10 +25,10 @@ You may have multiple sections like this, for the number of tables being transmi
**file** Path to the file with the table dump, or -, which refers to stdin.
Only a single table can be retrieved from stdin.
The following parameters are optional: **name** Name of the table. If omitted, \_data is used.
The following parameters are optional: **name** Name of the table. If omitted, _data is used.
**format** Data format in the file. If omitted, TabSeparated is used.
One of the following parameters is required:**types** A list of comma-separated column types. For example: `UInt64,String`. The columns will be named \_1, \_2, …
One of the following parameters is required:**types** A list of comma-separated column types. For example: `UInt64,String`. The columns will be named _1, _2, …
**structure** The table structure in the format`UserID UInt64`, `URL String`. Defines the column names and types.
The files specified in file will be parsed by the format specified in format, using the data types specified in types or structure. The table will be uploaded to the server and accessible there as a temporary table with the name in name.

View File

@ -93,15 +93,15 @@ You cannot perform a `SELECT` query directly from the table. Instead, use one of
When creating a table, the following settings are applied:
- [join\_use\_nulls](../../../operations/settings/settings.md#join_use_nulls)
- [max\_rows\_in\_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join)
- [max\_bytes\_in\_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join)
- [join\_overflow\_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode)
- [join\_any\_take\_last\_row](../../../operations/settings/settings.md#settings-join_any_take_last_row)
- [join_use_nulls](../../../operations/settings/settings.md#join_use_nulls)
- [max_rows_in_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join)
- [max_bytes_in_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join)
- [join_overflow_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode)
- [join_any_take_last_row](../../../operations/settings/settings.md#settings-join_any_take_last_row)
The `Join`-engine tables cant be used in `GLOBAL JOIN` operations.
The `Join`-engine allows use [join\_use\_nulls](../../../operations/settings/settings.md#join_use_nulls) setting in the `CREATE TABLE` statement. And [SELECT](../../../sql-reference/statements/select/index.md) query allows use `join_use_nulls` too. If you have different `join_use_nulls` settings, you can get an error joining table. It depends on kind of JOIN. When you use [joinGet](../../../sql-reference/functions/other-functions.md#joinget) function, you have to use the same `join_use_nulls` setting in `CRATE TABLE` and `SELECT` statements.
The `Join`-engine allows use [join_use_nulls](../../../operations/settings/settings.md#join_use_nulls) setting in the `CREATE TABLE` statement. And [SELECT](../../../sql-reference/statements/select/index.md) query allows use `join_use_nulls` too. If you have different `join_use_nulls` settings, you can get an error joining table. It depends on kind of JOIN. When you use [joinGet](../../../sql-reference/functions/other-functions.md#joinget) function, you have to use the same `join_use_nulls` setting in `CRATE TABLE` and `SELECT` statements.
## Data Storage {#data-storage}

View File

@ -33,7 +33,7 @@ The typical way to use the `Merge` engine is for working with a large number of
Example 2:
Lets say you have a old table (WatchLog\_old) and decided to change partitioning without moving data to a new table (WatchLog\_new) and you need to see data from both tables.
Lets say you have a old table (WatchLog_old) and decided to change partitioning without moving data to a new table (WatchLog_new) and you need to see data from both tables.
``` sql
CREATE TABLE WatchLog_old(date Date, UserId Int64, EventType String, Cnt UInt64)

View File

@ -23,7 +23,7 @@ additional headers for getting a response from the server.
respectively. For processing `POST` requests, the remote server must support
[Chunked transfer encoding](https://en.wikipedia.org/wiki/Chunked_transfer_encoding).
You can limit the maximum number of HTTP GET redirect hops using the [max\_http\_get\_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting.
You can limit the maximum number of HTTP GET redirect hops using the [max_http_get_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting.
## Example {#example}

View File

@ -40,7 +40,7 @@ Question candidates:
- How to implement pivot (like in pandas)?
- How to remove the default ClickHouse user through users.d?
- Importing MySQL dump to Clickhouse
- Window function workarounds (row\_number, lag/lead, running diff/sum/average)
- Window function workarounds (row_number, lag/lead, running diff/sum/average)
##}
{## [Original article](https://clickhouse.tech/docs/en/faq) ##}

View File

@ -6,7 +6,7 @@ toc_priority: 20
# What If I Have a Problem with Encodings When Using Oracle Via ODBC? {#oracle-odbc-encodings}
If you use Oracle as a source of ClickHouse external dictionaries via Oracle ODBC driver, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS\_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
If you use Oracle as a source of ClickHouse external dictionaries via Oracle ODBC driver, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
**Example**

View File

@ -7,7 +7,7 @@ toc_title: AMPLab Big Data Benchmark
See https://amplab.cs.berkeley.edu/benchmark/
Sign up for a free account at https://aws.amazon.com. It requires a credit card, email, and phone number. Get a new access key at https://console.aws.amazon.com/iam/home?nc2=h\_m\_sc\#security\_credential
Sign up for a free account at https://aws.amazon.com. It requires a credit card, email, and phone number. Get a new access key at https://console.aws.amazon.com/iam/home?nc2=h_m_sc#security_credential
Run the following in the console:

View File

@ -195,7 +195,7 @@ The data in this table uses 142 GB.
(Importing data directly from Postgres is also possible using `COPY ... TO PROGRAM`.)
Unfortunately, all the fields associated with the weather (precipitation…average\_wind\_speed) were filled with NULL. Because of this, we will remove them from the final data set.
Unfortunately, all the fields associated with the weather (precipitation…average_wind_speed) were filled with NULL. Because of this, we will remove them from the final data set.
To start, well create a table on a single server. Later we will make the table distributed.

View File

@ -7,9 +7,9 @@ toc_title: Installation
## System Requirements {#system-requirements}
ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86\_64, AArch64, or PowerPC64LE CPU architecture.
ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
Official pre-built binaries are typically compiled for x86\_64 and leverage SSE 4.2 instruction set, so unless otherwise stated usage of CPU that supports it becomes an additional system requirement. Heres the command to check if current CPU has support for SSE 4.2:
Official pre-built binaries are typically compiled for x86_64 and leverage SSE 4.2 instruction set, so unless otherwise stated usage of CPU that supports it becomes an additional system requirement. Heres the command to check if current CPU has support for SSE 4.2:
``` bash
$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported"

View File

@ -38,10 +38,10 @@ The queries are executed as a read-only user. It implies some limitations:
The following settings are also enforced:
- [max\_result\_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
- [max\_result\_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
- [result\_overflow\_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
- [max\_execution\_time=60000](../operations/settings/query_complexity/#max-execution-time)
- [max_result_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
- [max_result_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
- [result_overflow_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
- [max_execution_time=60000](../operations/settings/query_complexity/#max-execution-time)
## Examples {#examples}

View File

@ -11,10 +11,10 @@ In a “normal” row-oriented DBMS, data is stored in this order:
| Row | WatchID | JavaEnable | Title | GoodEvent | EventTime |
|-----|-------------|------------|--------------------|-----------|---------------------|
| \#0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 |
| \#1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 |
| \#2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 |
| \#N | … | … | … | … | … |
| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 |
| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 |
| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 |
| #N | … | … | … | … | … |
In other words, all the values related to a row are physically stored next to each other.
@ -22,7 +22,7 @@ Examples of a row-oriented DBMS are MySQL, Postgres, and MS SQL Server.
In a column-oriented DBMS, data is stored like this:
| Row: | \#0 | \#1 | \#2 | \#N |
| Row: | #0 | #1 | #2 | #N |
|-------------|---------------------|---------------------|---------------------|-----|
| WatchID: | 89354350662 | 90329509958 | 89953706054 | … |
| JavaEnable: | 1 | 0 | 1 | … |

View File

@ -43,6 +43,7 @@ The supported formats are:
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
| [PrettySpace](#prettyspace) | ✗ | ✔ |
| [Protobuf](#protobuf) | ✔ | ✔ |
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
| [Avro](#data-format-avro) | ✔ | ✔ |
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
| [Parquet](#data-format-parquet) | ✔ | ✔ |
@ -55,6 +56,7 @@ The supported formats are:
| [Null](#null) | ✗ | ✔ |
| [XML](#xml) | ✗ | ✔ |
| [CapnProto](#capnproto) | ✔ | ✗ |
| [LineAsString](#lineasstring) | ✔ | ✗ |
You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section.
@ -210,7 +212,7 @@ Setting `format_template_resultset` specifies the path to file, which contains a
- `min` is the row with minimum values in `format_template_row` format (when extremes are set to 1)
- `max` is the row with maximum values in `format_template_row` format (when extremes are set to 1)
- `rows` is the total number of output rows
- `rows_before_limit` is the minimal number of rows there would have been without LIMIT. Output only if the query contains LIMIT. If the query contains GROUP BY, rows\_before\_limit\_at\_least is the exact number of rows there would have been without a LIMIT.
- `rows_before_limit` is the minimal number of rows there would have been without LIMIT. Output only if the query contains LIMIT. If the query contains GROUP BY, rows_before_limit_at_least is the exact number of rows there would have been without a LIMIT.
- `time` is the request execution time in seconds
- `rows_read` is the number of rows has been read
- `bytes_read` is the number of bytes (uncompressed) has been read
@ -361,21 +363,21 @@ Parsing allows the presence of the additional field `tskv` without the equal sig
Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)).
When formatting, rows are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format\_csv\_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost).
When formatting, rows are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost).
``` bash
$ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv
```
\*By default, the delimiter is `,`. See the [format\_csv\_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter) setting for more information.
\*By default, the delimiter is `,`. See the [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter) setting for more information.
When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported.
Empty unquoted input values are replaced with default values for the respective columns, if
[input\_format\_defaults\_for\_omitted\_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields)
[input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields)
is enabled.
`NULL` is formatted as `\N` or `NULL` or an empty unquoted string (see settings [input\_format\_csv\_unquoted\_null\_literal\_as\_null](../operations/settings/settings.md#settings-input_format_csv_unquoted_null_literal_as_null) and [input\_format\_defaults\_for\_omitted\_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields)).
`NULL` is formatted as `\N` or `NULL` or an empty unquoted string (see settings [input_format_csv_unquoted_null_literal_as_null](../operations/settings/settings.md#settings-input_format_csv_unquoted_null_literal_as_null) and [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields)).
The CSV format supports the output of totals and extremes the same way as `TabSeparated`.
@ -439,12 +441,12 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA
}
```
The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character <20> so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double-quotes by default. To remove the quotes, you can set the configuration parameter [output\_format\_json\_quote\_64bit\_integers](../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) to 0.
The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character <20> so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double-quotes by default. To remove the quotes, you can set the configuration parameter [output_format_json_quote_64bit_integers](../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) to 0.
`rows` The total number of output rows.
`rows_before_limit_at_least` The minimal number of rows there would have been without LIMIT. Output only if the query contains LIMIT.
If the query contains GROUP BY, rows\_before\_limit\_at\_least is the exact number of rows there would have been without a LIMIT.
If the query contains GROUP BY, rows_before_limit_at_least is the exact number of rows there would have been without a LIMIT.
`totals` Total values (when using WITH TOTALS).
@ -452,7 +454,7 @@ If the query contains GROUP BY, rows\_before\_limit\_at\_least is the exact numb
This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table).
ClickHouse supports [NULL](../sql-reference/syntax.md), which is displayed as `null` in the JSON output. To enable `+nan`, `-nan`, `+inf`, `-inf` values in output, set the [output\_format\_json\_quote\_denormals](../operations/settings/settings.md#settings-output_format_json_quote_denormals) to 1.
ClickHouse supports [NULL](../sql-reference/syntax.md), which is displayed as `null` in the JSON output. To enable `+nan`, `-nan`, `+inf`, `-inf` values in output, set the [output_format_json_quote_denormals](../operations/settings/settings.md#settings-output_format_json_quote_denormals) to 1.
See also the [JSONEachRow](#jsoneachrow) format.
@ -633,7 +635,7 @@ ClickHouse ignores spaces between elements and commas after the objects. You can
ClickHouse substitutes omitted values with the default values for the corresponding [data types](../sql-reference/data-types/index.md).
If `DEFAULT expr` is specified, ClickHouse uses different substitution rules depending on the [input\_format\_defaults\_for\_omitted\_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting.
If `DEFAULT expr` is specified, ClickHouse uses different substitution rules depending on the [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting.
Consider the following table:
@ -676,7 +678,7 @@ Unlike the [JSON](#json) format, there is no substitution of invalid UTF-8 seque
### Usage of Nested Structures {#jsoneachrow-nested}
If you have a table with [Nested](../sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input\_format\_import\_nested\_json](../operations/settings/settings.md#settings-input_format_import_nested_json) setting.
If you have a table with [Nested](../sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](../operations/settings/settings.md#settings-input_format_import_nested_json) setting.
For example, consider the following table:
@ -690,7 +692,7 @@ As you can see in the `Nested` data type description, ClickHouse treats each com
INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n.s": ["abc", "def"], "n.i": [1, 23]}
```
To insert data as a hierarchical JSON object, set [input\_format\_import\_nested\_json=1](../operations/settings/settings.md#settings-input_format_import_nested_json).
To insert data as a hierarchical JSON object, set [input_format_import_nested_json=1](../operations/settings/settings.md#settings-input_format_import_nested_json).
``` json
{
@ -872,7 +874,7 @@ The minimum set of characters that you need to escape when passing data in Value
This is the format that is used in `INSERT INTO t VALUES ...`, but you can also use it for formatting query results.
See also: [input\_format\_values\_interpret\_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) and [input\_format\_values\_deduce\_templates\_of\_expressions](../operations/settings/settings.md#settings-input_format_values_deduce_templates_of_expressions) settings.
See also: [input_format_values_interpret_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) and [input_format_values_deduce_templates_of_expressions](../operations/settings/settings.md#settings-input_format_values_deduce_templates_of_expressions) settings.
## Vertical {#vertical}
@ -1075,6 +1077,10 @@ ClickHouse inputs and outputs protobuf messages in the `length-delimited` format
It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints).
See also [how to read/write length-delimited protobuf messages in popular languages](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages).
## ProtobufSingle {#protobufsingle}
Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters.
## Avro {#data-format-avro}
[Apache Avro](https://avro.apache.org/) is a row-oriented data serialization framework developed within Apaches Hadoop project.
@ -1135,7 +1141,7 @@ Column names must:
- start with `[A-Za-z_]`
- subsequently contain only `[A-Za-z0-9_]`
Output Avro file compression and sync interval can be configured with [output\_format\_avro\_codec](../operations/settings/settings.md#settings-output_format_avro_codec) and [output\_format\_avro\_sync\_interval](../operations/settings/settings.md#settings-output_format_avro_sync_interval) respectively.
Output Avro file compression and sync interval can be configured with [output_format_avro_codec](../operations/settings/settings.md#settings-output_format_avro_codec) and [output_format_avro_sync_interval](../operations/settings/settings.md#settings-output_format_avro_sync_interval) respectively.
## AvroConfluent {#data-format-avro-confluent}
@ -1145,7 +1151,7 @@ Each Avro message embeds a schema id that can be resolved to the actual schema w
Schemas are cached once resolved.
Schema Registry URL is configured with [format\_avro\_schema\_registry\_url](../operations/settings/settings.md#format_avro_schema_registry_url).
Schema Registry URL is configured with [format_avro_schema_registry_url](../operations/settings/settings.md#format_avro_schema_registry_url).
### Data Types Matching {#data_types-matching-1}
@ -1298,15 +1304,38 @@ can contain an absolute path or a path relative to the current directory on the
If you use the client in the [batch mode](../interfaces/cli.md#cli_usage), the path to the schema must be relative due to security reasons.
If you input or output data via the [HTTP interface](../interfaces/http.md) the file name specified in the format schema
should be located in the directory specified in [format\_schema\_path](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-format_schema_path)
should be located in the directory specified in [format_schema_path](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-format_schema_path)
in the server configuration.
## Skipping Errors {#skippingerrors}
Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input\_format\_allow\_errors\_num](../operations/settings/settings.md#settings-input_format_allow_errors_num) and
[input\_format\_allow\_errors\_ratio](../operations/settings/settings.md#settings-input_format_allow_errors_ratio) settings.
Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](../operations/settings/settings.md#settings-input_format_allow_errors_num) and
[input_format_allow_errors_ratio](../operations/settings/settings.md#settings-input_format_allow_errors_ratio) settings.
Limitations:
- In case of parsing error `JSONEachRow` skips all data until the new line (or EOF), so rows must be delimited by `\n` to count errors correctly.
- `Template` and `CustomSeparated` use delimiter after the last column and delimiter between rows to find the beginning of next row, so skipping errors works only if at least one of them is not empty.
## LineAsString {#lineasstring}
In this format, a sequence of string objects separated by a newline character is interpreted as a single value. This format can only be parsed for table with a single field of type [String](../sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](../sql-reference/statements/create/table.md#default) or [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), or omitted.
**Example**
Query:
``` sql
DROP TABLE IF EXISTS line_as_string;
CREATE TABLE line_as_string (field String) ENGINE = Memory;
INSERT INTO line_as_string FORMAT LineAsString "I love apple", "I love banana", "I love orange";
SELECT * FROM line_as_string;
```
Result:
``` text
┌─field─────────────────────────────────────────────┐
│ "I love apple", "I love banana", "I love orange"; │
└───────────────────────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/interfaces/formats/) <!--hide-->

View File

@ -9,7 +9,7 @@ The HTTP interface lets you use ClickHouse on any platform from any programming
By default, clickhouse-server listens for HTTP on port 8123 (this can be changed in the config).
If you make a GET / request without parameters, it returns 200 response code and the string which defined in [http\_server\_default\_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end)
If you make a GET / request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end)
``` bash
$ curl 'http://localhost:8123/'
@ -148,12 +148,12 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
For successful requests that dont return a data table, an empty response body is returned.
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http\_native\_compression\_disable\_checksumming\_on\_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
If you specified `compress=1` in the URL, the server compresses the data it sends you.
If you specified `decompress=1` in the URL, the server decompresses the same data that you pass in the `POST` method.
You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. In order for ClickHouse to compress the response, you must append `Accept-Encoding: compression_method`. ClickHouse supports `gzip`, `br`, and `deflate` [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens). To enable HTTP compression, you must use the ClickHouse [enable\_http\_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the data compression level in the [http\_zlib\_compression\_level](#settings-http_zlib_compression_level) setting for all the compression methods.
You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. In order for ClickHouse to compress the response, you must append `Accept-Encoding: compression_method`. ClickHouse supports `gzip`, `br`, and `deflate` [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens). To enable HTTP compression, you must use the ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the data compression level in the [http_zlib_compression_level](#settings-http_zlib_compression_level) setting for all the compression methods.
You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed.
@ -215,7 +215,7 @@ $ echo 'SELECT 1' | curl -H 'X-ClickHouse-User: user' -H 'X-ClickHouse-Key: pass
```
If the user name is not specified, the `default` name is used. If the password is not specified, the empty password is used.
You can also use the URL parameters to specify any settings for processing a single query or entire profiles of settings. Example:http://localhost:8123/?profile=web&max\_rows\_to\_read=1000000000&query=SELECT+1
You can also use the URL parameters to specify any settings for processing a single query or entire profiles of settings. Example:http://localhost:8123/?profile=web&max_rows_to_read=1000000000&query=SELECT+1
For more information, see the [Settings](../operations/settings/index.md) section.
@ -237,7 +237,7 @@ For information about other parameters, see the section “SET”.
Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you need to add the `session_id` GET parameter to the request. You can use any string as the session ID. By default, the session is terminated after 60 seconds of inactivity. To change this timeout, modify the `default_session_timeout` setting in the server configuration, or add the `session_timeout` GET parameter to the request. To check the session status, use the `session_check=1` parameter. Only one query at a time can be executed within a single session.
You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send\_progress\_in\_http\_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence:
You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence:
``` text
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"}
@ -254,9 +254,9 @@ Possible header fields:
- `written_bytes` — Volume of data written in bytes.
Running requests dont stop automatically if the HTTP connection is lost. Parsing and data formatting are performed on the server-side, and using the network might be ineffective.
The optional query\_id parameter can be passed as the query ID (any string). For more information, see the section “Settings, replace\_running\_query”.
The optional query_id parameter can be passed as the query ID (any string). For more information, see the section “Settings, replace_running_query”.
The optional quota\_key parameter can be passed as the quota key (any string). For more information, see the section “Quotas”.
The optional quota_key parameter can be passed as the quota key (any string). For more information, see the section “Quotas”.
The HTTP interface allows passing external data (external temporary tables) for querying. For more information, see the section “External data for query processing”.
@ -377,7 +377,7 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`:
- `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request.
- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`.
`type` currently supports three types: [predefined\_query\_handler](#predefined_query_handler), [dynamic\_query\_handler](#dynamic_query_handler), [static](#static).
`type` currently supports three types: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static).
- `query` — use with `predefined_query_handler` type, executes query when the handler is called.
@ -391,13 +391,13 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`:
Next are the configuration methods for different `type`.
### predefined\_query\_handler {#predefined_query_handler}
### predefined_query_handler {#predefined_query_handler}
`predefined_query_handler` supports setting `Settings` and `query_params` values. You can configure `query` in the type of `predefined_query_handler`.
`query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration.
The following example defines the values of [max\_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
Example:
@ -428,13 +428,13 @@ max_alter_threads 2
!!! note "caution"
In one `predefined_query_handler` only supports one `query` of an insert type.
### dynamic\_query\_handler {#dynamic_query_handler}
### dynamic_query_handler {#dynamic_query_handler}
In `dynamic_query_handler`, the query is written in the form of param of the HTTP request. The difference is that in `predefined_query_handler`, the query is written in the configuration file. You can configure `query_param_name` in `dynamic_query_handler`.
ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in.
To experiment with this functionality, the example defines the values of [max\_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully.
To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully.
Example:
@ -459,7 +459,7 @@ max_alter_threads 2
### static {#static}
`static` can return [content\_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and `response_content`. `response_content` can return the specified content.
`static` can return [content_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and `response_content`. `response_content` can return the specified content.
Example:

View File

@ -5,7 +5,7 @@ toc_title: MySQL Interface
# MySQL Interface {#mysql-interface}
ClickHouse supports MySQL wire protocol. It can be enabled by [mysql\_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting in configuration file:
ClickHouse supports MySQL wire protocol. It can be enabled by [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting in configuration file:
``` xml
<mysql_port>9004</mysql_port>

View File

@ -9,7 +9,7 @@ toc_title: Client Libraries
Yandex does **not** maintain the libraries listed below and havent done any extensive testing to ensure their quality.
- Python
- [infi.clickhouse\_orm](https://github.com/Infinidat/infi.clickhouse_orm)
- [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
- [clickhouse-client](https://github.com/yurial/clickhouse-client)
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
@ -46,7 +46,7 @@ toc_title: Client Libraries
- [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client)
- Kotlin
- [AORM](https://github.com/TanVD/AORM)
- C\#
- C#
- [Octonica.ClickHouseClient](https://github.com/Octonica/ClickHouseClient)
- [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net)
- [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client)

View File

@ -17,15 +17,15 @@ toc_title: Integrations
- [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader)
- [horgh-replicator](https://github.com/larsnovikov/horgh-replicator)
- [PostgreSQL](https://www.postgresql.org)
- [clickhousedb\_fdw](https://github.com/Percona-Lab/clickhousedb_fdw)
- [infi.clickhouse\_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (uses [infi.clickhouse\_orm](https://github.com/Infinidat/infi.clickhouse_orm))
- [clickhousedb_fdw](https://github.com/Percona-Lab/clickhousedb_fdw)
- [infi.clickhouse_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm))
- [pg2ch](https://github.com/mkabilov/pg2ch)
- [clickhouse\_fdw](https://github.com/adjust/clickhouse_fdw)
- [clickhouse_fdw](https://github.com/adjust/clickhouse_fdw)
- [MSSQL](https://en.wikipedia.org/wiki/Microsoft_SQL_Server)
- [ClickHouseMigrator](https://github.com/zlzforever/ClickHouseMigrator)
- Message queues
- [Kafka](https://kafka.apache.org)
- [clickhouse\_sinker](https://github.com/housepower/clickhouse_sinker) (uses [Go client](https://github.com/ClickHouse/clickhouse-go/))
- [clickhouse_sinker](https://github.com/housepower/clickhouse_sinker) (uses [Go client](https://github.com/ClickHouse/clickhouse-go/))
- [stream-loader-clickhouse](https://github.com/adform/stream-loader)
- Stream processing
- [Flink](https://flink.apache.org)
@ -49,12 +49,12 @@ toc_title: Integrations
- [Grafana](https://grafana.com/)
- [clickhouse-grafana](https://github.com/Vertamedia/clickhouse-grafana)
- [Prometheus](https://prometheus.io/)
- [clickhouse\_exporter](https://github.com/f1yegor/clickhouse_exporter)
- [clickhouse_exporter](https://github.com/f1yegor/clickhouse_exporter)
- [PromHouse](https://github.com/Percona-Lab/PromHouse)
- [clickhouse\_exporter](https://github.com/hot-wifi/clickhouse_exporter) (uses [Go client](https://github.com/kshvakov/clickhouse/))
- [clickhouse_exporter](https://github.com/hot-wifi/clickhouse_exporter) (uses [Go client](https://github.com/kshvakov/clickhouse/))
- [Nagios](https://www.nagios.org/)
- [check\_clickhouse](https://github.com/exogroup/check_clickhouse/)
- [check\_clickhouse.py](https://github.com/innogames/igmonplugins/blob/master/src/check_clickhouse.py)
- [check_clickhouse](https://github.com/exogroup/check_clickhouse/)
- [check_clickhouse.py](https://github.com/innogames/igmonplugins/blob/master/src/check_clickhouse.py)
- [Zabbix](https://www.zabbix.com)
- [clickhouse-zabbix-template](https://github.com/Altinity/clickhouse-zabbix-template)
- [Sematext](https://sematext.com/)
@ -74,7 +74,7 @@ toc_title: Integrations
- Python
- [SQLAlchemy](https://www.sqlalchemy.org)
- [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse\_orm](https://github.com/Infinidat/infi.clickhouse_orm))
- [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm))
- [pandas](https://pandas.pydata.org)
- [pandahouse](https://github.com/kszucs/pandahouse)
- PHP
@ -89,7 +89,7 @@ toc_title: Integrations
- Scala
- [Akka](https://akka.io)
- [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client)
- C\#
- C#
- [ADO.NET](https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/ado-net-overview)
- [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net)
- [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client)
@ -97,7 +97,7 @@ toc_title: Integrations
- [ClickHouse.Net.Migrations](https://github.com/ilyabreev/ClickHouse.Net.Migrations)
- Elixir
- [Ecto](https://github.com/elixir-ecto/ecto)
- [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto)
- [clickhouse_ecto](https://github.com/appodeal/clickhouse_ecto)
- Ruby
- [Ruby on Rails](https://rubyonrails.org/)
- [activecube](https://github.com/bitquery/activecube)

View File

@ -60,6 +60,7 @@ toc_title: Adopters
| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
| <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a> | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) |
| <a href="https://mellodesign.ru/" class="favicon">Mello</a> | Marketing | Analytics | 1 server | — | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) |
| <a href="https://www.messagebird.com" class="favicon">MessageBird</a> | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
| <a href="https://www.mindsdb.com/" class="favicon">MindsDB</a> | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |
| <a href="https://www.mgid.com/" class="favicon">MGID</a> | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
@ -68,6 +69,7 @@ toc_title: Adopters
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
| <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
| <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) |
| <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
@ -86,6 +88,7 @@ toc_title: Adopters
| <a href="https://smi2.ru/" class="favicon">SMI2</a> | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
| <a href="https://www.splunk.com/" class="favicon">Splunk</a> | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
| <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a> | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |

View File

@ -138,10 +138,10 @@ Management queries:
- Setup a directory for configurations storage.
ClickHouse stores access entity configurations in the folder set in the [access\_control\_path](../operations/server-configuration-parameters/settings.md#access_control_path) server configuration parameter.
ClickHouse stores access entity configurations in the folder set in the [access_control_path](../operations/server-configuration-parameters/settings.md#access_control_path) server configuration parameter.
- Enable SQL-driven access control and account management for at least one user account.
By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access\_management](../operations/settings/settings-users.md#access_management-user-setting) setting to 1.
By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access_management](../operations/settings/settings-users.md#access_management-user-setting) setting to 1.
[Original article](https://clickhouse.tech/docs/en/operations/access_rights/) <!--hide-->

View File

@ -20,7 +20,7 @@ Some settings specified in the main configuration file can be overridden in othe
## Substitution {#substitution}
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)).
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)).
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.

View File

@ -33,7 +33,7 @@ ClickHouse collects:
- Different metrics of how the server uses computational resources.
- Common statistics on query processing.
You can find metrics in the [system.metrics](../operations/system-tables/metrics.md#system_tables-metrics), [system.events](../operations/system-tables/events.md#system_tables-events), and [system.asynchronous\_metrics](../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) tables.
You can find metrics in the [system.metrics](../operations/system-tables/metrics.md#system_tables-metrics), [system.events](../operations/system-tables/events.md#system_tables-events), and [system.asynchronous_metrics](../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) tables.
You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html).
@ -41,4 +41,4 @@ You can configure ClickHouse to export metrics to [Prometheus](https://prometheu
Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/ping`. If the server is available, it responds with `200 OK`.
To monitor servers in a cluster configuration, you should set the [max\_replica\_delay\_for\_distributed\_queries](../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap.
To monitor servers in a cluster configuration, you should set the [max_replica_delay_for_distributed_queries](../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap.

View File

@ -9,11 +9,11 @@ ClickHouse runs sampling profiler that allows analyzing query execution. Using p
To use profiler:
- Setup the [trace\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration.
- Setup the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration.
This section configures the [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesnt clean up the table and all the stored virtual memory address may become invalid.
This section configures the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesnt clean up the table and all the stored virtual memory address may become invalid.
- Setup the [query\_profiler\_cpu\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) or [query\_profiler\_real\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously.
- Setup the [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) or [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously.
These settings allow you to configure profiler timers. As these are the session settings, you can get different sampling frequency for the whole server, individual users or user profiles, for your interactive session, and for each individual query.
@ -23,7 +23,7 @@ To analyze the `trace_log` system table:
- Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages).
- Allow introspection functions by the [allow\_introspection\_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting.
- Allow introspection functions by the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting.
For security reasons, introspection functions are disabled by default.

View File

@ -7,7 +7,7 @@ toc_title: Requirements
## CPU {#cpu}
For installation from prebuilt deb packages, use a CPU with x86\_64 architecture and support for SSE 4.2 instructions. To run ClickHouse with processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources.
For installation from prebuilt deb packages, use a CPU with x86_64 architecture and support for SSE 4.2 instructions. To run ClickHouse with processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources.
ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz.

View File

@ -5,7 +5,7 @@ toc_title: Server Settings
# Server Settings {#server-settings}
## builtin\_dictionaries\_reload\_interval {#builtin-dictionaries-reload-interval}
## builtin_dictionaries_reload_interval {#builtin-dictionaries-reload-interval}
The interval in seconds before reloading built-in dictionaries.
@ -92,7 +92,7 @@ Configures soft limit for core dump file size, one gigabyte by default.
(Hard limit is configured via system tools)
## default\_database {#default-database}
## default_database {#default-database}
The default database.
@ -104,7 +104,7 @@ To get a list of databases, use the [SHOW DATABASES](../../sql-reference/stateme
<default_database>default</default_database>
```
## default\_profile {#default-profile}
## default_profile {#default-profile}
Default settings profile.
@ -116,7 +116,7 @@ Settings profiles are located in the file specified in the parameter `user_confi
<default_profile>default</default_profile>
```
## dictionaries\_config {#server_configuration_parameters-dictionaries_config}
## dictionaries_config {#server_configuration_parameters-dictionaries_config}
The path to the config file for external dictionaries.
@ -133,7 +133,7 @@ See also “[External dictionaries](../../sql-reference/dictionaries/external-di
<dictionaries_config>*_dictionary.xml</dictionaries_config>
```
## dictionaries\_lazy\_load {#server_configuration_parameters-dictionaries_lazy_load}
## dictionaries_lazy_load {#server_configuration_parameters-dictionaries_lazy_load}
Lazy loading of dictionaries.
@ -149,7 +149,7 @@ The default is `true`.
<dictionaries_lazy_load>true</dictionaries_lazy_load>
```
## format\_schema\_path {#server_configuration_parameters-format_schema_path}
## format_schema_path {#server_configuration_parameters-format_schema_path}
The path to the directory with the schemes for the input data, such as schemas for the [CapnProto](../../interfaces/formats.md#capnproto) format.
@ -170,11 +170,11 @@ Settings:
- port The port on the Graphite server.
- interval The interval for sending, in seconds.
- timeout The timeout for sending data, in seconds.
- root\_path Prefix for keys.
- root_path Prefix for keys.
- metrics Sending data from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
- events Sending deltas data accumulated for the time period from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- events\_cumulative Sending cumulative data from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- asynchronous\_metrics Sending data from the [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
- events_cumulative Sending cumulative data from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- asynchronous_metrics Sending data from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
You can configure multiple `<graphite>` clauses. For instance, you can use this for sending different data at different intervals.
@ -194,7 +194,7 @@ You can configure multiple `<graphite>` clauses. For instance, you can use this
</graphite>
```
## graphite\_rollup {#server_configuration_parameters-graphite-rollup}
## graphite_rollup {#server_configuration_parameters-graphite-rollup}
Settings for thinning data for Graphite.
@ -222,7 +222,7 @@ For more details, see [GraphiteMergeTree](../../engines/table-engines/mergetree-
</graphite_rollup_example>
```
## http\_port/https\_port {#http-porthttps-port}
## http_port/https_port {#http-porthttps-port}
The port for connecting to the server over HTTP(s).
@ -236,7 +236,7 @@ If `http_port` is specified, the OpenSSL configuration is ignored even if it is
<https_port>9999</https_port>
```
## http\_server\_default\_response {#server_configuration_parameters-http_server_default_response}
## http_server_default_response {#server_configuration_parameters-http_server_default_response}
The page that is shown by default when you access the ClickHouse HTTP(s) server.
The default value is “Ok.” (with a line feed at the end)
@ -251,7 +251,7 @@ Opens `https://tabix.io/` when accessing `http://localhost: http_port`.
</http_server_default_response>
```
## include\_from {#server_configuration_parameters-include_from}
## include_from {#server_configuration_parameters-include_from}
The path to the file with substitutions.
@ -263,7 +263,7 @@ For more information, see the section “[Configuration files](../../operations/
<include_from>/etc/metrica.xml</include_from>
```
## interserver\_http\_port {#interserver-http-port}
## interserver_http_port {#interserver-http-port}
Port for exchanging data between ClickHouse servers.
@ -273,7 +273,7 @@ Port for exchanging data between ClickHouse servers.
<interserver_http_port>9009</interserver_http_port>
```
## interserver\_http\_host {#interserver-http-host}
## interserver_http_host {#interserver-http-host}
The hostname that can be used by other servers to access this server.
@ -287,7 +287,7 @@ Useful for breaking away from a specific network interface.
<interserver_http_host>example.yandex.ru</interserver_http_host>
```
## interserver\_http\_credentials {#server-settings-interserver-http-credentials}
## interserver_http_credentials {#server-settings-interserver-http-credentials}
The username and password used to authenticate during [replication](../../engines/table-engines/mergetree-family/replication.md) with the Replicated\* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster.
By default, the authentication is not used.
@ -306,7 +306,7 @@ This section contains the following parameters:
</interserver_http_credentials>
```
## keep\_alive\_timeout {#keep-alive-timeout}
## keep_alive_timeout {#keep-alive-timeout}
The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 3 seconds.
@ -316,7 +316,7 @@ The number of seconds that ClickHouse waits for incoming requests before closing
<keep_alive_timeout>3</keep_alive_timeout>
```
## listen\_host {#server_configuration_parameters-listen_host}
## listen_host {#server_configuration_parameters-listen_host}
Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`.
@ -367,25 +367,25 @@ Writing to the syslog is also supported. Config example:
Keys for syslog:
- use\_syslog — Required setting if you want to write to the syslog.
- use_syslog — Required setting if you want to write to the syslog.
- address — The host\[:port\] of syslogd. If omitted, the local daemon is used.
- hostname — Optional. The name of the host that logs are sent from.
- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) in uppercase letters with the “LOG\_” prefix: (`LOG_USER`, `LOG_DAEMON`, `LOG_LOCAL3`, and so on).
- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) in uppercase letters with the “LOG_” prefix: (`LOG_USER`, `LOG_DAEMON`, `LOG_LOCAL3`, and so on).
Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON` otherwise.
- format Message format. Possible values: `bsd` and `syslog.`
## send\_crash\_reports {#server_configuration_parameters-logger}
## send_crash_reports {#server_configuration_parameters-logger}
Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io).
Enabling it, especially in pre-production environments, is greatly appreciated.
Enabling it, especially in pre-production environments, is highly appreciated.
The server will need an access to public Internet via IPv4 (at the time of writing IPv6 is not supported by Sentry) for this feature to be functioning properly.
The server will need access to the public Internet via IPv4 (at the time of writing IPv6 is not supported by Sentry) for this feature to be functioning properly.
Keys:
- `enabled` Boolean flag to enable the feature, `false` by default. Set to `true` to allow sending crash reports.
- `endpoint` You can override the Sentry endpoint URL for sending crash reports. It can be either separate Sentry account or your self-hosted Sentry instance. Use the [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk) syntax.
- `anonymize` - Avoid attaching the server hostname to crash report.
- `endpoint` You can override the Sentry endpoint URL for sending crash reports. It can be either a separate Sentry account or your self-hosted Sentry instance. Use the [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk) syntax.
- `anonymize` - Avoid attaching the server hostname to the crash report.
- `http_proxy` - Configure HTTP proxy for sending crash reports.
- `debug` - Sets the Sentry client into debug mode.
- `tmp_path` - Filesystem path for temporary crash report state.
@ -412,7 +412,7 @@ For more information, see the section “[Creating replicated tables](../../engi
<macros incl="macros" optional="true" />
```
## mark\_cache\_size {#server-mark-cache-size}
## mark_cache_size {#server-mark-cache-size}
Approximate size (in bytes) of the cache of marks used by table engines of the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family.
@ -424,7 +424,7 @@ The cache is shared for the server and memory is allocated as needed. The cache
<mark_cache_size>5368709120</mark_cache_size>
```
## max\_server\_memory\_usage {#max_server_memory_usage}
## max_server_memory_usage {#max_server_memory_usage}
Limits total RAM usage by the ClickHouse server.
@ -441,7 +441,7 @@ The default `max_server_memory_usage` value is calculated as `memory_amount * ma
**See also**
- [max\_memory\_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage)
- [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage)
- [max_server_memory_usage_to_ram_ratio](#max_server_memory_usage_to_ram_ratio)
## max_server_memory_usage_to_ram_ratio {#max_server_memory_usage_to_ram_ratio}
@ -469,7 +469,7 @@ On hosts with low RAM and swap, you possibly need setting `max_server_memory_usa
- [max_server_memory_usage](#max_server_memory_usage)
## max\_concurrent\_queries {#max-concurrent-queries}
## max_concurrent_queries {#max-concurrent-queries}
The maximum number of simultaneously processed requests.
@ -479,7 +479,7 @@ The maximum number of simultaneously processed requests.
<max_concurrent_queries>100</max_concurrent_queries>
```
## max\_connections {#max-connections}
## max_connections {#max-connections}
The maximum number of inbound connections.
@ -489,7 +489,7 @@ The maximum number of inbound connections.
<max_connections>4096</max_connections>
```
## max\_open\_files {#max-open-files}
## max_open_files {#max-open-files}
The maximum number of open files.
@ -503,7 +503,7 @@ We recommend using this option in Mac OS X since the `getrlimit()` function retu
<max_open_files>262144</max_open_files>
```
## max\_table\_size\_to\_drop {#max-table-size-to-drop}
## max_table_size_to_drop {#max-table-size-to-drop}
Restriction on deleting tables.
@ -521,7 +521,7 @@ The value 0 means that you can delete all tables without any restrictions.
<max_table_size_to_drop>0</max_table_size_to_drop>
```
## max\_thread\_pool\_size {#max-thread-pool-size}
## max_thread_pool_size {#max-thread-pool-size}
The maximum number of threads in the Global Thread pool.
@ -533,7 +533,7 @@ Default value: 10000.
<max_thread_pool_size>12000</max_thread_pool_size>
```
## merge\_tree {#server_configuration_parameters-merge_tree}
## merge_tree {#server_configuration_parameters-merge_tree}
Fine tuning for tables in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
@ -547,7 +547,7 @@ For more information, see the MergeTreeSettings.h header file.
</merge_tree>
```
## replicated\_merge\_tree {#server_configuration_parameters-replicated_merge_tree}
## replicated_merge_tree {#server_configuration_parameters-replicated_merge_tree}
Fine tuning for tables in the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
@ -584,7 +584,7 @@ Keys for server/client settings:
- sessionTimeout Time for caching the session on the server.
- extendedVerification Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`.
- requireTLSv1 Require a TLSv1 connection. Acceptable values: `true`, `false`.
- requireTLSv1\_1 Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
- requireTLSv1_1 Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
- requireTLSv1 Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
- fips Activates OpenSSL FIPS mode. Supported if the librarys OpenSSL version supports FIPS.
- privateKeyPassphraseHandler Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: `<privateKeyPassphraseHandler>`, `<name>KeyFileHandler</name>`, `<options><password>test</password></options>`, `</privateKeyPassphraseHandler>`.
@ -622,11 +622,11 @@ Keys for server/client settings:
</openSSL>
```
## part\_log {#server_configuration_parameters-part-log}
## part_log {#server_configuration_parameters-part-log}
Logging events that are associated with [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process.
Queries are logged in the [system.part\_log](../../operations/system-tables/part_log.md#system_tables-part-log) table, not in a separate file. You can configure the name of this table in the `table` parameter (see below).
Queries are logged in the [system.part_log](../../operations/system-tables/part_log.md#system_tables-part-log) table, not in a separate file. You can configure the name of this table in the `table` parameter (see below).
Use the following parameters to configure logging:
@ -670,7 +670,7 @@ Settings:
- `port` Port for `endpoint`.
- `metrics` Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
- `events` Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- `asynchronous_metrics` Flag that sets to expose current metrics values from the [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
- `asynchronous_metrics` Flag that sets to expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
**Example**
@ -684,11 +684,11 @@ Settings:
</prometheus>
```
## query\_log {#server_configuration_parameters-query-log}
## query_log {#server_configuration_parameters-query-log}
Setting for logging queries received with the [log\_queries=1](../../operations/settings/settings.md) setting.
Setting for logging queries received with the [log_queries=1](../../operations/settings/settings.md) setting.
Queries are logged in the [system.query\_log](../../operations/system-tables/query_log.md#system_tables-query_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
Queries are logged in the [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
Use the following parameters to configure logging:
@ -711,11 +711,11 @@ If the table doesnt exist, ClickHouse will create it. If the structure of the
</query_log>
```
## query\_thread\_log {#server_configuration_parameters-query_thread_log}
## query_thread_log {#server_configuration_parameters-query_thread_log}
Setting for logging threads of queries received with the [log\_query\_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting.
Setting for logging threads of queries received with the [log_query_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting.
Queries are logged in the [system.query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
Queries are logged in the [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
Use the following parameters to configure logging:
@ -738,9 +738,9 @@ If the table doesnt exist, ClickHouse will create it. If the structure of the
</query_thread_log>
```
## text\_log {#server_configuration_parameters-text_log}
## text_log {#server_configuration_parameters-text_log}
Settings for the [text\_log](../../operations/system-tables/text_log.md#system_tables-text_log) system table for logging text messages.
Settings for the [text_log](../../operations/system-tables/text_log.md#system_tables-text_log) system table for logging text messages.
Parameters:
@ -766,9 +766,9 @@ Parameters:
```
## trace\_log {#server_configuration_parameters-trace_log}
## trace_log {#server_configuration_parameters-trace_log}
Settings for the [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.
Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.
Parameters:
@ -789,7 +789,7 @@ The default server configuration file `config.xml` contains the following settin
</trace_log>
```
## query\_masking\_rules {#query-masking-rules}
## query_masking_rules {#query-masking-rules}
Regexp-based rules, which will be applied to queries as well as all log messages before storing them in server logs,
`system.query_log`, `system.text_log`, `system.processes` tables, and in logs sent to the client. That allows preventing
@ -820,7 +820,7 @@ The masking rules are applied to the whole query (to prevent leaks of sensitive
For distributed queries each server have to be configured separately, otherwise, subqueries passed to other
nodes will be stored without masking.
## remote\_servers {#server-settings-remote-servers}
## remote_servers {#server-settings-remote-servers}
Configuration of clusters used by the [Distributed](../../engines/table-engines/special/distributed.md) table engine and by the `cluster` table function.
@ -834,7 +834,7 @@ For the value of the `incl` attribute, see the section “[Configuration files](
**See Also**
- [skip\_unavailable\_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards)
- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards)
## timezone {#server_configuration_parameters-timezone}
@ -850,7 +850,7 @@ The time zone is necessary for conversions between String and DateTime formats w
<timezone>Europe/Moscow</timezone>
```
## tcp\_port {#server_configuration_parameters-tcp_port}
## tcp_port {#server_configuration_parameters-tcp_port}
Port for communicating with clients over the TCP protocol.
@ -860,7 +860,7 @@ Port for communicating with clients over the TCP protocol.
<tcp_port>9000</tcp_port>
```
## tcp\_port\_secure {#server_configuration_parameters-tcp_port_secure}
## tcp_port_secure {#server_configuration_parameters-tcp_port_secure}
TCP port for secure communication with clients. Use it with [OpenSSL](#server_configuration_parameters-openssl) settings.
@ -874,7 +874,7 @@ Positive integer.
<tcp_port_secure>9440</tcp_port_secure>
```
## mysql\_port {#server_configuration_parameters-mysql_port}
## mysql_port {#server_configuration_parameters-mysql_port}
Port for communicating with clients over MySQL protocol.
@ -888,7 +888,7 @@ Example
<mysql_port>9004</mysql_port>
```
## tmp\_path {#tmp-path}
## tmp_path {#tmp-path}
Path to temporary data for processing large queries.
@ -901,11 +901,11 @@ Path to temporary data for processing large queries.
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
```
## tmp\_policy {#tmp-policy}
## tmp_policy {#tmp-policy}
Policy from [storage\_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files.
Policy from [storage_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files.
If not set, [tmp\_path](#tmp-path) is used, otherwise it is ignored.
If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored.
!!! note "Note"
- `move_factor` is ignored.
@ -913,11 +913,11 @@ If not set, [tmp\_path](#tmp-path) is used, otherwise it is ignored.
- `max_data_part_size_bytes` is ignored.
- Уou must have exactly one volume in that policy.
## uncompressed\_cache\_size {#server-settings-uncompressed_cache_size}
## uncompressed_cache_size {#server-settings-uncompressed_cache_size}
Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use\_uncompressed\_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache) is enabled.
There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache) is enabled.
The uncompressed cache is advantageous for very short queries in individual cases.
@ -927,7 +927,7 @@ The uncompressed cache is advantageous for very short queries in individual case
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
```
## user\_files\_path {#server_configuration_parameters-user_files_path}
## user_files_path {#server_configuration_parameters-user_files_path}
The directory with user files. Used in the table function [file()](../../sql-reference/table-functions/file.md).
@ -937,7 +937,7 @@ The directory with user files. Used in the table function [file()](../../sql-ref
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
```
## users\_config {#users-config}
## users_config {#users-config}
Path to the file that contains:
@ -1005,13 +1005,13 @@ This section contains the following parameters:
- [Replication](../../engines/table-engines/mergetree-family/replication.md)
- [ZooKeeper Programmers Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html)
## use\_minimalistic\_part\_header\_in\_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper}
## use_minimalistic_part_header_in_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper}
Storage method for data part headers in ZooKeeper.
This setting only applies to the `MergeTree` family. It can be specified:
- Globally in the [merge\_tree](#server_configuration_parameters-merge_tree) section of the `config.xml` file.
- Globally in the [merge_tree](#server_configuration_parameters-merge_tree) section of the `config.xml` file.
ClickHouse uses the setting for all the tables on the server. You can change the setting at any time. Existing tables change their behaviour when the setting changes.
@ -1033,14 +1033,14 @@ If `use_minimalistic_part_header_in_zookeeper = 1`, then [replicated](../../engi
**Default value:** 0.
## disable\_internal\_dns\_cache {#server-settings-disable-internal-dns-cache}
## disable_internal_dns_cache {#server-settings-disable-internal-dns-cache}
Disables the internal DNS cache. Recommended for operating ClickHouse in systems
with frequently changing infrastructure such as Kubernetes.
**Default value:** 0.
## dns\_cache\_update\_period {#server-settings-dns-cache-update-period}
## dns_cache_update_period {#server-settings-dns-cache-update-period}
The period of updating IP addresses stored in the ClickHouse internal DNS cache (in seconds).
The update is performed asynchronously, in a separate system thread.
@ -1049,9 +1049,9 @@ The update is performed asynchronously, in a separate system thread.
**See also**
- [background\_schedule\_pool\_size](../../operations/settings/settings.md#background_schedule_pool_size)
- [background_schedule_pool_size](../../operations/settings/settings.md#background_schedule_pool_size)
## access\_control\_path {#access_control_path}
## access_control_path {#access_control_path}
Path to a folder where a ClickHouse server stores user and role configurations created by SQL commands.

View File

@ -16,7 +16,7 @@ Queries in ClickHouse can be divided into several types:
The following settings regulate user permissions by the type of query:
- [readonly](#settings_readonly) — Restricts permissions for all types of queries except DDL queries.
- [allow\_ddl](#settings_allow_ddl) — Restricts permissions for DDL queries.
- [allow_ddl](#settings_allow_ddl) — Restricts permissions for DDL queries.
`KILL QUERY` can be performed with any settings.
@ -41,7 +41,7 @@ from changing only specific settings, for details see [constraints on settings](
Default value: 0
## allow\_ddl {#settings_allow_ddl}
## allow_ddl {#settings_allow_ddl}
Allows or denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries.

View File

@ -12,8 +12,8 @@ Almost all the restrictions only apply to `SELECT`. For distributed query proces
ClickHouse checks the restrictions for data parts, not for each row. It means that you can exceed the value of restriction with the size of the data part.
Restrictions on the “maximum amount of something” can take the value 0, which means “unrestricted”.
Most restrictions also have an overflow\_mode setting, meaning what to do when the limit is exceeded.
It can take one of two values: `throw` or `break`. Restrictions on aggregation (group\_by\_overflow\_mode) also have the value `any`.
Most restrictions also have an overflow_mode setting, meaning what to do when the limit is exceeded.
It can take one of two values: `throw` or `break`. Restrictions on aggregation (group_by_overflow_mode) also have the value `any`.
`throw` Throw an exception (default).
@ -21,7 +21,7 @@ It can take one of two values: `throw` or `break`. Restrictions on aggregation (
`any (only for group_by_overflow_mode)` Continuing aggregation for the keys that got into the set, but dont add new keys to the set.
## max\_memory\_usage {#settings_max_memory_usage}
## max_memory_usage {#settings_max_memory_usage}
The maximum amount of RAM to use for running a query on a single server.
@ -36,31 +36,31 @@ Memory usage is not monitored for the states of certain aggregate functions.
Memory usage is not fully tracked for states of the aggregate functions `min`, `max`, `any`, `anyLast`, `argMin`, `argMax` from `String` and `Array` arguments.
Memory consumption is also restricted by the parameters `max_memory_usage_for_user` and [max\_server\_memory\_usage](../../operations/server-configuration-parameters/settings.md#max_server_memory_usage).
Memory consumption is also restricted by the parameters `max_memory_usage_for_user` and [max_server_memory_usage](../../operations/server-configuration-parameters/settings.md#max_server_memory_usage).
## max\_memory\_usage\_for\_user {#max-memory-usage-for-user}
## max_memory_usage_for_user {#max-memory-usage-for-user}
The maximum amount of RAM to use for running a users queries on a single server.
Default values are defined in [Settings.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Settings.h#L288). By default, the amount is not restricted (`max_memory_usage_for_user = 0`).
See also the description of [max\_memory\_usage](#settings_max_memory_usage).
See also the description of [max_memory_usage](#settings_max_memory_usage).
## max\_rows\_to\_read {#max-rows-to-read}
## max_rows_to_read {#max-rows-to-read}
The following restrictions can be checked on each block (instead of on each row). That is, the restrictions can be broken a little.
A maximum number of rows that can be read from a table when running a query.
## max\_bytes\_to\_read {#max-bytes-to-read}
## max_bytes_to_read {#max-bytes-to-read}
A maximum number of bytes (uncompressed data) that can be read from a table when running a query.
## read\_overflow\_mode {#read-overflow-mode}
## read_overflow_mode {#read-overflow-mode}
What to do when the volume of data read exceeds one of the limits: throw or break. By default, throw.
## max\_rows\_to\_read_leaf {#max-rows-to-read-leaf}
## max_rows_to_read_leaf {#max-rows-to-read-leaf}
The following restrictions can be checked on each block (instead of on each row). That is, the restrictions can be broken a little.
@ -71,7 +71,7 @@ and each shard contains a table with 100 rows. Then distributed query which supp
tables with setting `max_rows_to_read=150` will fail as in total it will be 200 rows. While query
with `max_rows_to_read_leaf=150` will succeed since leaf nodes will read 100 rows at max.
## max\_bytes\_to\_read_leaf {#max-bytes-to-read-leaf}
## max_bytes_to_read_leaf {#max-bytes-to-read-leaf}
A maximum number of bytes (uncompressed data) that can be read from a local table on a leaf node when running
a distributed query. While distributed queries can issue a multiple sub-queries to each shard (leaf) - this limit will
@ -81,20 +81,20 @@ Then distributed query which suppose to read all the data from both tables with
as in total it will be 200 bytes. While query with `max_bytes_to_read_leaf=150` will succeed since leaf nodes will read
100 bytes at max.
## read\_overflow\_mode_leaf {#read-overflow-mode-leaf}
## read_overflow_mode_leaf {#read-overflow-mode-leaf}
What to do when the volume of data read exceeds one of the leaf limits: throw or break. By default, throw.
## max\_rows\_to\_group\_by {#settings-max-rows-to-group-by}
## max_rows_to_group_by {#settings-max-rows-to-group-by}
A maximum number of unique keys received from aggregation. This setting lets you limit memory consumption when aggregating.
## group\_by\_overflow\_mode {#group-by-overflow-mode}
## group_by_overflow_mode {#group-by-overflow-mode}
What to do when the number of unique keys for aggregation exceeds the limit: throw, break, or any. By default, throw.
Using the any value lets you run an approximation of GROUP BY. The quality of this approximation depends on the statistical nature of the data.
## max\_bytes\_before\_external\_group\_by {#settings-max_bytes_before_external_group_by}
## max_bytes_before_external_group_by {#settings-max_bytes_before_external_group_by}
Enables or disables execution of `GROUP BY` clauses in external memory. See [GROUP BY in external memory](../../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory).
@ -105,31 +105,31 @@ Possible values:
Default value: 0.
## max\_rows\_to\_sort {#max-rows-to-sort}
## max_rows_to_sort {#max-rows-to-sort}
A maximum number of rows before sorting. This allows you to limit memory consumption when sorting.
## max\_bytes\_to\_sort {#max-bytes-to-sort}
## max_bytes_to_sort {#max-bytes-to-sort}
A maximum number of bytes before sorting.
## sort\_overflow\_mode {#sort-overflow-mode}
## sort_overflow_mode {#sort-overflow-mode}
What to do if the number of rows received before sorting exceeds one of the limits: throw or break. By default, throw.
## max\_result\_rows {#setting-max_result_rows}
## max_result_rows {#setting-max_result_rows}
Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query.
## max\_result\_bytes {#max-result-bytes}
## max_result_bytes {#max-result-bytes}
Limit on the number of bytes in the result. The same as the previous setting.
## result\_overflow\_mode {#result-overflow-mode}
## result_overflow_mode {#result-overflow-mode}
What to do if the volume of the result exceeds one of the limits: throw or break. By default, throw.
Using break is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max\_result\_rows](#setting-max_result_rows), multiple of [max\_block\_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max\_threads](../../operations/settings/settings.md#settings-max_threads).
Using break is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max_threads](../../operations/settings/settings.md#settings-max_threads).
Example:
@ -148,103 +148,103 @@ Result:
6666 rows in set. ...
```
## max\_execution\_time {#max-execution-time}
## max_execution_time {#max-execution-time}
Maximum query execution time in seconds.
At this time, it is not checked for one of the sorting stages, or when merging and finalizing aggregate functions.
## timeout\_overflow\_mode {#timeout-overflow-mode}
## timeout_overflow_mode {#timeout-overflow-mode}
What to do if the query is run longer than max\_execution\_time: throw or break. By default, throw.
What to do if the query is run longer than max_execution_time: throw or break. By default, throw.
## min\_execution\_speed {#min-execution-speed}
## min_execution_speed {#min-execution-speed}
Minimal execution speed in rows per second. Checked on every data block when timeout\_before\_checking\_execution\_speed expires. If the execution speed is lower, an exception is thrown.
Minimal execution speed in rows per second. Checked on every data block when timeout_before_checking_execution_speed expires. If the execution speed is lower, an exception is thrown.
## min\_execution\_speed\_bytes {#min-execution-speed-bytes}
## min_execution_speed_bytes {#min-execution-speed-bytes}
A minimum number of execution bytes per second. Checked on every data block when timeout\_before\_checking\_execution\_speed expires. If the execution speed is lower, an exception is thrown.
A minimum number of execution bytes per second. Checked on every data block when timeout_before_checking_execution_speed expires. If the execution speed is lower, an exception is thrown.
## max\_execution\_speed {#max-execution-speed}
## max_execution_speed {#max-execution-speed}
A maximum number of execution rows per second. Checked on every data block when timeout\_before\_checking\_execution\_speed expires. If the execution speed is high, the execution speed will be reduced.
A maximum number of execution rows per second. Checked on every data block when timeout_before_checking_execution_speed expires. If the execution speed is high, the execution speed will be reduced.
## max\_execution\_speed\_bytes {#max-execution-speed-bytes}
## max_execution_speed_bytes {#max-execution-speed-bytes}
A maximum number of execution bytes per second. Checked on every data block when timeout\_before\_checking\_execution\_speed expires. If the execution speed is high, the execution speed will be reduced.
A maximum number of execution bytes per second. Checked on every data block when timeout_before_checking_execution_speed expires. If the execution speed is high, the execution speed will be reduced.
## timeout\_before\_checking\_execution\_speed {#timeout-before-checking-execution-speed}
## timeout_before_checking_execution_speed {#timeout-before-checking-execution-speed}
Checks that execution speed is not too slow (no less than min\_execution\_speed), after the specified time in seconds has expired.
Checks that execution speed is not too slow (no less than min_execution_speed), after the specified time in seconds has expired.
## max\_columns\_to\_read {#max-columns-to-read}
## max_columns_to_read {#max-columns-to-read}
A maximum number of columns that can be read from a table in a single query. If a query requires reading a greater number of columns, it throws an exception.
## max\_temporary\_columns {#max-temporary-columns}
## max_temporary_columns {#max-temporary-columns}
A maximum number of temporary columns that must be kept in RAM at the same time when running a query, including constant columns. If there are more temporary columns than this, it throws an exception.
## max\_temporary\_non\_const\_columns {#max-temporary-non-const-columns}
## max_temporary_non_const_columns {#max-temporary-non-const-columns}
The same thing as max\_temporary\_columns, but without counting constant columns.
The same thing as max_temporary_columns, but without counting constant columns.
Note that constant columns are formed fairly often when running a query, but they require approximately zero computing resources.
## max\_subquery\_depth {#max-subquery-depth}
## max_subquery_depth {#max-subquery-depth}
Maximum nesting depth of subqueries. If subqueries are deeper, an exception is thrown. By default, 100.
## max\_pipeline\_depth {#max-pipeline-depth}
## max_pipeline_depth {#max-pipeline-depth}
Maximum pipeline depth. Corresponds to the number of transformations that each data block goes through during query processing. Counted within the limits of a single server. If the pipeline depth is greater, an exception is thrown. By default, 1000.
## max\_ast\_depth {#max-ast-depth}
## max_ast_depth {#max-ast-depth}
Maximum nesting depth of a query syntactic tree. If exceeded, an exception is thrown.
At this time, it isnt checked during parsing, but only after parsing the query. That is, a syntactic tree that is too deep can be created during parsing, but the query will fail. By default, 1000.
## max\_ast\_elements {#max-ast-elements}
## max_ast_elements {#max-ast-elements}
A maximum number of elements in a query syntactic tree. If exceeded, an exception is thrown.
In the same way as the previous setting, it is checked only after parsing the query. By default, 50,000.
## max\_rows\_in\_set {#max-rows-in-set}
## max_rows_in_set {#max-rows-in-set}
A maximum number of rows for a data set in the IN clause created from a subquery.
## max\_bytes\_in\_set {#max-bytes-in-set}
## max_bytes_in_set {#max-bytes-in-set}
A maximum number of bytes (uncompressed data) used by a set in the IN clause created from a subquery.
## set\_overflow\_mode {#set-overflow-mode}
## set_overflow_mode {#set-overflow-mode}
What to do when the amount of data exceeds one of the limits: throw or break. By default, throw.
## max\_rows\_in\_distinct {#max-rows-in-distinct}
## max_rows_in_distinct {#max-rows-in-distinct}
A maximum number of different rows when using DISTINCT.
## max\_bytes\_in\_distinct {#max-bytes-in-distinct}
## max_bytes_in_distinct {#max-bytes-in-distinct}
A maximum number of bytes used by a hash table when using DISTINCT.
## distinct\_overflow\_mode {#distinct-overflow-mode}
## distinct_overflow_mode {#distinct-overflow-mode}
What to do when the amount of data exceeds one of the limits: throw or break. By default, throw.
## max\_rows\_to\_transfer {#max-rows-to-transfer}
## max_rows_to_transfer {#max-rows-to-transfer}
A maximum number of rows that can be passed to a remote server or saved in a temporary table when using GLOBAL IN.
## max\_bytes\_to\_transfer {#max-bytes-to-transfer}
## max_bytes_to_transfer {#max-bytes-to-transfer}
A maximum number of bytes (uncompressed data) that can be passed to a remote server or saved in a temporary table when using GLOBAL IN.
## transfer\_overflow\_mode {#transfer-overflow-mode}
## transfer_overflow_mode {#transfer-overflow-mode}
What to do when the amount of data exceeds one of the limits: throw or break. By default, throw.
## max\_rows\_in\_join {#settings-max_rows_in_join}
## max_rows_in_join {#settings-max_rows_in_join}
Limits the number of rows in the hash table that is used when joining tables.
@ -252,7 +252,7 @@ This settings applies to [SELECT … JOIN](../../sql-reference/statements/select
If a query contains multiple joins, ClickHouse checks this setting for every intermediate result.
ClickHouse can proceed with different actions when the limit is reached. Use the [join\_overflow\_mode](#settings-join_overflow_mode) setting to choose the action.
ClickHouse can proceed with different actions when the limit is reached. Use the [join_overflow_mode](#settings-join_overflow_mode) setting to choose the action.
Possible values:
@ -261,7 +261,7 @@ Possible values:
Default value: 0.
## max\_bytes\_in\_join {#settings-max_bytes_in_join}
## max_bytes_in_join {#settings-max_bytes_in_join}
Limits the size in bytes of the hash table used when joining tables.
@ -269,7 +269,7 @@ This settings applies to [SELECT … JOIN](../../sql-reference/statements/select
If the query contains joins, ClickHouse checks this setting for every intermediate result.
ClickHouse can proceed with different actions when the limit is reached. Use [join\_overflow\_mode](#settings-join_overflow_mode) settings to choose the action.
ClickHouse can proceed with different actions when the limit is reached. Use [join_overflow_mode](#settings-join_overflow_mode) settings to choose the action.
Possible values:
@ -278,12 +278,12 @@ Possible values:
Default value: 0.
## join\_overflow\_mode {#settings-join_overflow_mode}
## join_overflow_mode {#settings-join_overflow_mode}
Defines what action ClickHouse performs when any of the following join limits is reached:
- [max\_bytes\_in\_join](#settings-max_bytes_in_join)
- [max\_rows\_in\_join](#settings-max_rows_in_join)
- [max_bytes_in_join](#settings-max_bytes_in_join)
- [max_rows_in_join](#settings-max_rows_in_join)
Possible values:
@ -297,7 +297,7 @@ Default value: `THROW`.
- [JOIN clause](../../sql-reference/statements/select/join.md#select-join)
- [Join table engine](../../engines/table-engines/special/join.md)
## max\_partitions\_per\_insert\_block {#max-partitions-per-insert-block}
## max_partitions_per_insert_block {#max-partitions-per-insert-block}
Limits the maximum number of partitions in a single inserted block.
@ -310,6 +310,6 @@ Default value: 100.
When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse throws an exception with the following text:
> “Too many partitions for single INSERT block (more than” + toString(max\_parts) + “). The limit is controlled by max\_partitions\_per\_insert\_block setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).”
> “Too many partitions for single INSERT block (more than” + toString(max_parts) + “). The limit is controlled by max_partitions_per_insert_block setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).”
[Original article](https://clickhouse.tech/docs/en/operations/settings/query_complexity/) <!--hide-->

View File

@ -41,7 +41,7 @@ Structure of the `users` section:
</users>
```
### user\_name/password {#user-namepassword}
### user_name/password {#user-namepassword}
Password can be specified in plaintext or in SHA256 (hex format).
@ -73,7 +73,7 @@ Password can be specified in plaintext or in SHA256 (hex format).
The first line of the result is the password. The second line is the corresponding double SHA1 hash.
### access\_management {#access_management-user-setting}
### access_management {#access_management-user-setting}
This setting enables or disables using of SQL-driven [access control and account management](../../operations/access-rights.md#access-control) for the user.
@ -84,7 +84,7 @@ Possible values:
Default value: 0.
### user\_name/networks {#user-namenetworks}
### user_name/networks {#user-namenetworks}
List of networks from which the user can connect to the ClickHouse server.
@ -126,18 +126,18 @@ To open access only from localhost, specify:
<ip>127.0.0.1</ip>
```
### user\_name/profile {#user-nameprofile}
### user_name/profile {#user-nameprofile}
You can assign a settings profile for the user. Settings profiles are configured in a separate section of the `users.xml` file. For more information, see [Profiles of Settings](../../operations/settings/settings-profiles.md).
### user\_name/quota {#user-namequota}
### user_name/quota {#user-namequota}
Quotas allow you to track or limit resource usage over a period of time. Quotas are configured in the `quotas`
section of the `users.xml` configuration file.
You can assign a quotas set for the user. For a detailed description of quotas configuration, see [Quotas](../../operations/quotas.md#quotas).
### user\_name/databases {#user-namedatabases}
### user_name/databases {#user-namedatabases}
In this section, you can you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security.

File diff suppressed because it is too large Load Diff

View File

@ -33,6 +33,6 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics.
- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred.
- [system.metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/asynchronous_metrics) <!--hide-->

View File

@ -20,7 +20,7 @@ Please note that `errors_count` is updated once per query to the cluster, but `e
**See also**
- [Table engine Distributed](../../engines/table-engines/special/distributed.md)
- [distributed\_replica\_error\_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap)
- [distributed\_replica\_error\_half\_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life)
- [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap)
- [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life)
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/clusters) <!--hide-->

View File

@ -26,9 +26,9 @@ SELECT * FROM system.events LIMIT 5
**See Also**
- [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics.
- [system.metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/events) <!--hide-->

View File

@ -1,6 +1,6 @@
# system.graphite_retentions {#system-graphite-retentions}
Contains information about parameters [graphite\_rollup](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) which are used in tables with [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md) engines.
Contains information about parameters [graphite_rollup](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) which are used in tables with [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md) engines.
Columns:

View File

@ -20,7 +20,7 @@ System tables:
Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start.
Unlike other system tables, the system tables [metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query\_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
Unlike other system tables, the system tables [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter/ttl.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables.

View File

@ -49,7 +49,7 @@ CurrentMetric_DistributedFilesToInsert: 0
**See also**
- [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md) — Contains periodically calculated metrics.
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md) — Contains periodically calculated metrics.
- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
- [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.

View File

@ -33,9 +33,9 @@ SELECT * FROM system.metrics LIMIT 10
**See Also**
- [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
- [system.metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/metrics) <!--hide-->

View File

@ -1,6 +1,6 @@
# system.part_log {#system_tables-part-log}
The `system.part_log` table is created only if the [part\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-part-log) server setting is specified.
The `system.part_log` table is created only if the [part_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-part-log) server setting is specified.
This table contains information about events that occurred with [data parts](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family tables, such as adding or merging data.

View File

@ -10,7 +10,7 @@ Columns:
- `rows_read` (UInt64) The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
- `bytes_read` (UInt64) The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
- `total_rows_approx` (UInt64) The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known.
- `memory_usage` (UInt64) Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max\_memory\_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting.
- `memory_usage` (UInt64) Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting.
- `query` (String) The query text. For `INSERT`, it doesnt include the data to insert.
- `query_id` (String) Query ID, if defined.

View File

@ -5,11 +5,11 @@ Contains information about executed queries, for example, start time, duration o
!!! note "Note"
This table doesnt contain the ingested data for `INSERT` queries.
You can change settings of queries logging in the [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) section of the server configuration.
You can change settings of queries logging in the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) section of the server configuration.
You can disable queries logging by setting [log\_queries = 0](../../operations/settings/settings.md#settings-log-queries). We dont recommend to turn off logging because information in this table is important for solving issues.
You can disable queries logging by setting [log_queries = 0](../../operations/settings/settings.md#settings-log-queries). We dont recommend to turn off logging because information in this table is important for solving issues.
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
ClickHouse doesnt delete data from the table automatically. See [Introduction](../../operations/system-tables/index.md#system-tables-introduction) for more details.
@ -140,7 +140,7 @@ Settings.Values: ['0','random','1','10000000000','1']
**See Also**
- [system.query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread.
- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_log) <!--hide-->

View File

@ -4,10 +4,10 @@ Contains information about threads which execute queries, for example, thread na
To start logging:
1. Configure parameters in the [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) section.
2. Set [log\_query\_threads](../../operations/settings/settings.md#settings-log-query-threads) to 1.
1. Configure parameters in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) section.
2. Set [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) to 1.
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
ClickHouse doesnt delete data from the table automatically. See [Introduction](../../operations/system-tables/index.md#system-tables-introduction) for more details.
@ -113,6 +113,6 @@ ProfileEvents.Values: [1,1,11,11,591,148,3,71,29,6533808,1,11,72,18,47,
**See Also**
- [system.query\_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution.
- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_thread_log) <!--hide-->

View File

@ -63,7 +63,7 @@ Columns:
- `parts_to_check` (`UInt32`) - The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged.
- `zookeeper_path` (`String`) - Path to table data in ZooKeeper.
- `replica_name` (`String`) - Replica name in ZooKeeper. Different replicas of the same table have different names.
- `replica_path` (`String`) - Path to replica data in ZooKeeper. The same as concatenating zookeeper\_path/replicas/replica\_path.
- `replica_path` (`String`) - Path to replica data in ZooKeeper. The same as concatenating zookeeper_path/replicas/replica_path.
- `columns_version` (`Int32`) - Version number of the table structure. Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas havent made all of the ALTERs yet.
- `queue_size` (`UInt32`) - Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with `future_parts`.
- `inserts_in_queue` (`UInt32`) - Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong.
@ -84,7 +84,7 @@ The next 4 columns have a non-zero value only where there is an active session w
- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ZooKeeper (i.e., the number of functioning replicas).
If you request all the columns, the table may work a bit slowly, since several reads from ZooKeeper are made for each row.
If you dont request the last 4 columns (log\_max\_index, log\_pointer, total\_replicas, active\_replicas), the table works quickly.
If you dont request the last 4 columns (log_max_index, log_pointer, total_replicas, active_replicas), the table works quickly.
For example, you can check that everything is working correctly like this:

View File

@ -1,4 +1,4 @@
# system.text\_log {#system_tables-text_log}
# system.text_log {#system_tables-text_log}
Contains logging entries. Logging level which goes to this table can be limited with `text_log.level` server setting.

View File

@ -2,7 +2,7 @@
Contains stack traces collected by the sampling query profiler.
ClickHouse creates this table when the [trace\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query\_profiler\_real\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) and [query\_profiler\_cpu\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set.
ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) and [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set.
To analyze logs, use the `addressToLine`, `addressToSymbol` and `demangle` introspection functions.
@ -27,7 +27,7 @@ Columns:
- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Thread identifier.
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query\_log](#system_tables-query_log) system table.
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](#system_tables-query_log) system table.
- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process.

View File

@ -57,7 +57,7 @@ When creating RAID-10, select the `far` layout.
If your budget allows, choose RAID-10.
If you have more than 4 disks, use RAID-6 (preferred) or RAID-50, instead of RAID-5.
When using RAID-5, RAID-6 or RAID-50, always increase stripe\_cache\_size, since the default value is usually not the best choice.
When using RAID-5, RAID-6 or RAID-50, always increase stripe_cache_size, since the default value is usually not the best choice.
``` bash
$ echo 4096 | sudo tee /sys/block/md2/md/stripe_cache_size

View File

@ -103,7 +103,7 @@ Check:
- Endpoint settings.
Check [listen\_host](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-listen_host) and [tcp\_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) settings.
Check [listen_host](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-listen_host) and [tcp_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) settings.
ClickHouse server accepts localhost connections only by default.
@ -115,7 +115,7 @@ Check:
Check:
- The [tcp\_port\_secure](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) setting.
- The [tcp_port_secure](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) setting.
- Settings for [SSL certificates](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl).
Use proper parameters while connecting. For example, use the `port_secure` parameter with `clickhouse_client`.

View File

@ -492,6 +492,6 @@ Solution: Write in the GROUP BY query SearchPhrase HAVING uniqUpTo(4)(UserID) >=
[Original article](https://clickhouse.tech/docs/en/query_language/agg_functions/parametric_functions/) <!--hide-->
## sumMapFiltered(keys\_to\_keep)(keys, values) {#summapfilteredkeys-to-keepkeys-values}
## sumMapFiltered(keys_to_keep)(keys, values) {#summapfilteredkeys-to-keepkeys-values}
Same behavior as [sumMap](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) except that an array of keys is passed as a parameter. This can be especially useful when working with a high cardinality of keys.

View File

@ -26,7 +26,7 @@ In both cases the type of the returned value is [UInt64](../../../sql-reference/
**Details**
ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count\_distinct\_implementation](../../../operations/settings/settings.md#settings-count_distinct_implementation) setting. It defines which of the [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) function.
ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count_distinct_implementation](../../../operations/settings/settings.md#settings-count_distinct_implementation) setting. It defines which of the [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) function.
The `SELECT count() FROM table` query is not optimized, because the number of entries in the table is not stored separately. It chooses a small column from the table and counts the number of values in it.

View File

@ -34,8 +34,8 @@ The following table lists cases when query feature works in ClickHouse, but beha
| E021-01 | CHARACTER data type | No{.text-danger} | |
| E021-02 | CHARACTER VARYING data type | No{.text-danger} | `String` behaves similarly, but without length limit in parentheses |
| E021-03 | Character literals | Partial{.text-warning} | No automatic concatenation of consecutive literals and character set support |
| E021-04 | CHARACTER\_LENGTH function | Partial{.text-warning} | No `USING` clause |
| E021-05 | OCTET\_LENGTH function | No{.text-danger} | `LENGTH` behaves similarly |
| E021-04 | CHARACTER_LENGTH function | Partial{.text-warning} | No `USING` clause |
| E021-05 | OCTET_LENGTH function | No{.text-danger} | `LENGTH` behaves similarly |
| E021-06 | SUBSTRING | Partial{.text-warning} | No support for `SIMILAR` and `ESCAPE` clauses, no `SUBSTRING_REGEX` variant |
| E021-07 | Character concatenation | Partial{.text-warning} | No `COLLATE` clause |
| E021-08 | UPPER and LOWER functions | Yes{.text-success} | |
@ -152,7 +152,7 @@ The following table lists cases when query feature works in ClickHouse, but beha
| F051-03 | TIMESTAMP data type (including support of TIMESTAMP literal) with fractional seconds precision of at least 0 and 6 | No{.text-danger} | `DateTime64` time provides similar functionality |
| F051-04 | Comparison predicate on DATE, TIME, and TIMESTAMP data types | Partial{.text-warning} | Only one data type available |
| F051-05 | Explicit CAST between datetime types and character string types | Yes{.text-success} | |
| F051-06 | CURRENT\_DATE | No{.text-danger} | `today()` is similar |
| F051-06 | CURRENT_DATE | No{.text-danger} | `today()` is similar |
| F051-07 | LOCALTIME | No{.text-danger} | `now()` is similar |
| F051-08 | LOCALTIMESTAMP | No{.text-danger} | |
| **F081** | **UNION and EXCEPT in views** | **Partial**{.text-warning} | |

View File

@ -27,9 +27,9 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t
The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isnt explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter.
ClickHouse outputs values in `YYYY-MM-DD hh:mm:ss` text format by default. You can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
ClickHouse outputs values depending on the value of the [date\_time\_output\_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionaly you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date\_time\_input\_format](../../operations/settings/settings.md#settings-date_time_input_format) setting.
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting.
## Examples {#examples}
@ -120,6 +120,7 @@ FROM dt
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
- [Functions for working with arrays](../../sql-reference/functions/array-functions.md)
- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format)
- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format)
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)
- [The `Date` data type](../../sql-reference/data-types/date.md)

Some files were not shown because too many files have changed in this diff Show More