mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-17 21:24:28 +00:00
Merge branch 'master' into actions-dag
This commit is contained in:
commit
d9d83d8db6
1
.gitignore
vendored
1
.gitignore
vendored
@ -118,6 +118,7 @@ website/package-lock.json
|
||||
|
||||
# clangd cache
|
||||
/.clangd
|
||||
/.cache
|
||||
|
||||
/compile_commands.json
|
||||
|
||||
|
@ -17,4 +17,4 @@ ClickHouse is an open-source column-oriented database management system that all
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [ClickHouse online meetup (in Russian)](https://clck.ru/R2zB9) on October 1, 2020.
|
||||
* [ClickHouse virtual office hours](https://www.eventbrite.com/e/clickhouse-october-virtual-meetup-office-hours-tickets-123129500651) on October 22, 2020.
|
||||
|
@ -16,19 +16,6 @@ void trim(String & s)
|
||||
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end());
|
||||
}
|
||||
|
||||
// Uses separate replxx::Replxx instance to avoid loading them again in the
|
||||
// current context (replxx::Replxx::history_load() will re-load the history
|
||||
// from the file), since then they will overlaps with history from the current
|
||||
// session (this will make behavior compatible with other interpreters, i.e.
|
||||
// bash).
|
||||
void history_save(const String & history_file_path, const String & line)
|
||||
{
|
||||
replxx::Replxx rx_no_overlap;
|
||||
rx_no_overlap.history_load(history_file_path);
|
||||
rx_no_overlap.history_add(line);
|
||||
rx_no_overlap.history_save(history_file_path);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ReplxxLineReader::ReplxxLineReader(
|
||||
@ -58,7 +45,10 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
}
|
||||
else
|
||||
{
|
||||
rx.history_load(history_file_path);
|
||||
if (!rx.history_load(history_file_path))
|
||||
{
|
||||
rx.print("Loading history failed: %s\n", strerror(errno));
|
||||
}
|
||||
|
||||
if (flock(history_file_fd, LOCK_UN))
|
||||
{
|
||||
@ -128,7 +118,8 @@ void ReplxxLineReader::addToHistory(const String & line)
|
||||
rx.history_add(line);
|
||||
|
||||
// flush changes to the disk
|
||||
history_save(history_file_path, line);
|
||||
if (!rx.history_save(history_file_path))
|
||||
rx.print("Saving history failed: %s\n", strerror(errno));
|
||||
|
||||
if (locked && 0 != flock(history_file_fd, LOCK_UN))
|
||||
rx.print("Unlock of history file failed: %s\n", strerror(errno));
|
||||
|
@ -313,13 +313,4 @@ namespace ZeroTraits
|
||||
}
|
||||
|
||||
|
||||
inline bool operator==(StringRef lhs, const char * rhs)
|
||||
{
|
||||
for (size_t pos = 0; pos < lhs.size; ++pos)
|
||||
if (!rhs[pos] || lhs.data[pos] != rhs[pos])
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & os, const StringRef & str);
|
||||
|
@ -57,8 +57,8 @@ if (SANITIZE)
|
||||
endif ()
|
||||
|
||||
elseif (SANITIZE STREQUAL "undefined")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt")
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
|
||||
endif()
|
||||
|
@ -15,6 +15,10 @@ if (COMPILER_GCC)
|
||||
elseif (COMPILER_CLANG)
|
||||
# Require minimum version of clang/apple-clang
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
|
||||
# If you are developer you can figure out what exact versions of AppleClang are Ok,
|
||||
# remove the following line and commit changes below.
|
||||
message (FATAL_ERROR "AppleClang is not supported, you should install clang from brew.")
|
||||
|
||||
# AppleClang 10.0.1 (Xcode 10.2) corresponds to LLVM/Clang upstream version 7.0.0
|
||||
# AppleClang 11.0.0 (Xcode 11.0) corresponds to LLVM/Clang upstream version 8.0.0
|
||||
set (XCODE_MINIMUM_VERSION 10.2)
|
||||
|
@ -92,6 +92,11 @@ if (COMPILER_CLANG)
|
||||
no_warning(weak-template-vtables)
|
||||
no_warning(weak-vtables)
|
||||
|
||||
# XXX: libstdc++ has some of these for 3way compare
|
||||
if (NOT USE_LIBCXX)
|
||||
no_warning(zero-as-null-pointer-constant)
|
||||
endif()
|
||||
|
||||
# TODO Enable conversion, sign-conversion, double-promotion warnings.
|
||||
endif ()
|
||||
elseif (COMPILER_GCC)
|
||||
|
2
contrib/replxx
vendored
2
contrib/replxx
vendored
@ -1 +1 @@
|
||||
Subproject commit 94b1f568d16183214d26c7c0e9ce69a4ce407f65
|
||||
Subproject commit 8cf626c04e9a74313fb0b474cdbe2297c0f3cdc8
|
69
debian/clickhouse-server.init
vendored
69
debian/clickhouse-server.init
vendored
@ -153,82 +153,19 @@ initdb()
|
||||
|
||||
start()
|
||||
{
|
||||
[ -x $CLICKHOUSE_BINDIR/$PROGRAM ] || exit 0
|
||||
local EXIT_STATUS
|
||||
EXIT_STATUS=0
|
||||
|
||||
echo -n "Start $PROGRAM service: "
|
||||
|
||||
if is_running; then
|
||||
echo -n "already running "
|
||||
EXIT_STATUS=1
|
||||
else
|
||||
ulimit -n 262144
|
||||
mkdir -p $CLICKHOUSE_PIDDIR
|
||||
chown -R $CLICKHOUSE_USER:$CLICKHOUSE_GROUP $CLICKHOUSE_PIDDIR
|
||||
initdb
|
||||
if ! is_running; then
|
||||
# Lock should not be held while running child process, so we release the lock. Note: obviously, there is race condition.
|
||||
# But clickhouse-server has protection from simultaneous runs with same data directory.
|
||||
su -s $SHELL ${CLICKHOUSE_USER} -c "$FLOCK -u 9; $CLICKHOUSE_PROGRAM_ENV exec -a \"$PROGRAM\" \"$CLICKHOUSE_BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\""
|
||||
EXIT_STATUS=$?
|
||||
if [ $EXIT_STATUS -ne 0 ]; then
|
||||
return $EXIT_STATUS
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $EXIT_STATUS -eq 0 ]; then
|
||||
attempts=0
|
||||
while ! is_running && [ $attempts -le ${CLICKHOUSE_START_TIMEOUT:=10} ]; do
|
||||
attempts=$(($attempts + 1))
|
||||
sleep 1
|
||||
done
|
||||
if is_running; then
|
||||
echo "DONE"
|
||||
else
|
||||
echo "UNKNOWN"
|
||||
fi
|
||||
else
|
||||
echo "FAILED"
|
||||
fi
|
||||
|
||||
return $EXIT_STATUS
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} start --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
stop()
|
||||
{
|
||||
#local EXIT_STATUS
|
||||
EXIT_STATUS=0
|
||||
|
||||
if [ -f $CLICKHOUSE_PIDFILE ]; then
|
||||
|
||||
echo -n "Stop $PROGRAM service: "
|
||||
|
||||
kill -TERM $(cat "$CLICKHOUSE_PIDFILE")
|
||||
|
||||
if ! wait_for_done ${CLICKHOUSE_STOP_TIMEOUT}; then
|
||||
EXIT_STATUS=2
|
||||
echo "TIMEOUT"
|
||||
else
|
||||
echo "DONE"
|
||||
fi
|
||||
|
||||
fi
|
||||
return $EXIT_STATUS
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} stop --pid-path "${CLICKHOUSE_PIDDIR}"
|
||||
}
|
||||
|
||||
|
||||
restart()
|
||||
{
|
||||
check_config
|
||||
if stop; then
|
||||
if start; then
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
return 1
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} restart --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
|
||||
}
|
||||
|
||||
|
||||
|
104
debian/clickhouse-server.postinst
vendored
104
debian/clickhouse-server.postinst
vendored
@ -2,6 +2,7 @@
|
||||
set -e
|
||||
# set -x
|
||||
|
||||
PROGRAM=clickhouse-server
|
||||
CLICKHOUSE_USER=${CLICKHOUSE_USER:=clickhouse}
|
||||
CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP:=${CLICKHOUSE_USER}}
|
||||
# Please note that we don't support paths with whitespaces. This is rather ignorant.
|
||||
@ -12,6 +13,7 @@ CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
|
||||
CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
|
||||
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
|
||||
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
|
||||
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
|
||||
|
||||
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
|
||||
[ -f /etc/default/clickhouse ] && . /etc/default/clickhouse
|
||||
@ -41,105 +43,5 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Make sure the administrative user exists
|
||||
if ! getent passwd ${CLICKHOUSE_USER} > /dev/null; then
|
||||
if [ -n "$not_deb_os" ]; then
|
||||
useradd -r -s /bin/false --home-dir /nonexistent ${CLICKHOUSE_USER} > /dev/null
|
||||
else
|
||||
adduser --system --disabled-login --no-create-home --home /nonexistent \
|
||||
--shell /bin/false --group --gecos "ClickHouse server" ${CLICKHOUSE_USER} > /dev/null
|
||||
fi
|
||||
fi
|
||||
|
||||
# if the user was created manually, make sure the group is there as well
|
||||
if ! getent group ${CLICKHOUSE_GROUP} > /dev/null; then
|
||||
groupadd -r ${CLICKHOUSE_GROUP} > /dev/null
|
||||
fi
|
||||
|
||||
# make sure user is in the correct group
|
||||
if ! id -Gn ${CLICKHOUSE_USER} | grep -qw ${CLICKHOUSE_USER}; then
|
||||
usermod -a -G ${CLICKHOUSE_GROUP} ${CLICKHOUSE_USER} > /dev/null
|
||||
fi
|
||||
|
||||
# check validity of user and group
|
||||
if [ "$(id -u ${CLICKHOUSE_USER})" -eq 0 ]; then
|
||||
echo "The ${CLICKHOUSE_USER} system user must not have uid 0 (root).
|
||||
Please fix this and reinstall this package." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$(id -g ${CLICKHOUSE_GROUP})" -eq 0 ]; then
|
||||
echo "The ${CLICKHOUSE_USER} system user must not have root as primary group.
|
||||
Please fix this and reinstall this package." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ] && [ -f "$CLICKHOUSE_CONFIG" ]; then
|
||||
if [ -z "$SHELL" ]; then
|
||||
SHELL="/bin/sh"
|
||||
fi
|
||||
CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") ||:
|
||||
echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}"
|
||||
fi
|
||||
CLICKHOUSE_DATADIR_FROM_CONFIG=${CLICKHOUSE_DATADIR_FROM_CONFIG:=$CLICKHOUSE_DATADIR}
|
||||
|
||||
if [ ! -d ${CLICKHOUSE_DATADIR_FROM_CONFIG} ]; then
|
||||
mkdir -p ${CLICKHOUSE_DATADIR_FROM_CONFIG}
|
||||
chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_DATADIR_FROM_CONFIG}
|
||||
chmod 700 ${CLICKHOUSE_DATADIR_FROM_CONFIG}
|
||||
fi
|
||||
|
||||
if [ -d ${CLICKHOUSE_CONFDIR} ]; then
|
||||
mkdir -p ${CLICKHOUSE_CONFDIR}/users.d
|
||||
mkdir -p ${CLICKHOUSE_CONFDIR}/config.d
|
||||
rm -fv ${CLICKHOUSE_CONFDIR}/*-preprocessed.xml ||:
|
||||
fi
|
||||
|
||||
[ -e ${CLICKHOUSE_CONFDIR}/preprocessed ] || ln -s ${CLICKHOUSE_DATADIR_FROM_CONFIG}/preprocessed_configs ${CLICKHOUSE_CONFDIR}/preprocessed ||:
|
||||
|
||||
if [ ! -d ${CLICKHOUSE_LOGDIR} ]; then
|
||||
mkdir -p ${CLICKHOUSE_LOGDIR}
|
||||
chown root:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}
|
||||
# Allow everyone to read logs, root and clickhouse to read-write
|
||||
chmod 775 ${CLICKHOUSE_LOGDIR}
|
||||
fi
|
||||
|
||||
# Set net_admin capabilities to support introspection of "taskstats" performance metrics from the kernel
|
||||
# and ipc_lock capabilities to allow mlock of clickhouse binary.
|
||||
|
||||
# 1. Check that "setcap" tool exists.
|
||||
# 2. Check that an arbitrary program with installed capabilities can run.
|
||||
# 3. Set the capabilities.
|
||||
|
||||
# The second is important for Docker and systemd-nspawn.
|
||||
# When the container has no capabilities,
|
||||
# but the executable file inside the container has capabilities,
|
||||
# then attempt to run this file will end up with a cryptic "Operation not permitted" message.
|
||||
|
||||
TMPFILE=/tmp/test_setcap.sh
|
||||
|
||||
command -v setcap >/dev/null \
|
||||
&& echo > $TMPFILE && chmod a+x $TMPFILE && $TMPFILE && setcap "cap_net_admin,cap_ipc_lock,cap_sys_nice+ep" $TMPFILE && $TMPFILE && rm $TMPFILE \
|
||||
&& setcap "cap_net_admin,cap_ipc_lock,cap_sys_nice+ep" "${CLICKHOUSE_BINDIR}/${CLICKHOUSE_GENERIC_PROGRAM}" \
|
||||
|| echo "Cannot set 'net_admin' or 'ipc_lock' or 'sys_nice' capability for clickhouse binary. This is optional. Taskstats accounting will be disabled. To enable taskstats accounting you may add the required capability later manually."
|
||||
|
||||
# Clean old dynamic compilation results
|
||||
if [ -d "${CLICKHOUSE_DATADIR_FROM_CONFIG}/build" ]; then
|
||||
rm -f ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.cpp ${CLICKHOUSE_DATADIR_FROM_CONFIG}/build/*.so ||:
|
||||
fi
|
||||
|
||||
if [ -f /usr/share/debconf/confmodule ]; then
|
||||
db_get clickhouse-server/default-password
|
||||
defaultpassword="$RET"
|
||||
if [ -n "$defaultpassword" ]; then
|
||||
echo "<yandex><users><default><password>$defaultpassword</password></default></users></yandex>" > ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
|
||||
chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
|
||||
chmod 600 ${CLICKHOUSE_CONFDIR}/users.d/default-password.xml
|
||||
fi
|
||||
|
||||
# everything went well, so now let's reset the password
|
||||
db_set clickhouse-server/default-password ""
|
||||
# ... done with debconf here
|
||||
db_stop
|
||||
fi
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
|
||||
fi
|
||||
|
@ -31,14 +31,10 @@ RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \
|
||||
&& chmod +x dpkg-deb \
|
||||
&& cp dpkg-deb /usr/bin
|
||||
|
||||
ENV APACHE_PUBKEY_HASH="bba6987b63c63f710fd4ed476121c588bc3812e99659d27a855f8c4d312783ee66ad6adfce238765691b04d62fa3688f"
|
||||
|
||||
RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
|
||||
&& wget -nv -O /tmp/arrow-keyring.deb "https://apache.bintray.com/arrow/ubuntu/apache-arrow-archive-keyring-latest-${CODENAME}.deb" \
|
||||
&& echo "${APACHE_PUBKEY_HASH} /tmp/arrow-keyring.deb" | sha384sum -c \
|
||||
&& dpkg -i /tmp/arrow-keyring.deb
|
||||
|
||||
|
||||
# Libraries from OS are only needed to test the "unbundled" build (this is not used in production).
|
||||
RUN apt-get update \
|
||||
&& apt-get install \
|
||||
|
@ -52,6 +52,7 @@ RUN apt-get update \
|
||||
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7'" >> /etc/environment; \
|
||||
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \
|
||||
echo "MSAN_OPTIONS='abort_on_error=1'" >> /etc/environment; \
|
||||
echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment; \
|
||||
ln -s /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-symbolizer /usr/bin/llvm-symbolizer;
|
||||
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
|
||||
# (but w/o verbosity for TSAN, otherwise test.reference will not match)
|
||||
|
@ -56,7 +56,6 @@ RUN apt-get update \
|
||||
python3-lxml \
|
||||
python3-requests \
|
||||
python3-termcolor \
|
||||
qemu-user-static \
|
||||
rename \
|
||||
software-properties-common \
|
||||
tzdata \
|
||||
|
@ -33,6 +33,12 @@ server_pid=none
|
||||
|
||||
function stop_server
|
||||
{
|
||||
if ! kill -0 -- "$server_pid"
|
||||
then
|
||||
echo "ClickHouse server pid '$server_pid' is not running"
|
||||
return 0
|
||||
fi
|
||||
|
||||
for _ in {1..60}
|
||||
do
|
||||
if ! pkill -f "clickhouse-server" && ! kill -- "$server_pid" ; then break ; fi
|
||||
@ -168,8 +174,8 @@ clickhouse-test --help
|
||||
|
||||
mkdir -p "$FASTTEST_DATA"{,/client-config}
|
||||
cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA"
|
||||
cp -a "$FASTTEST_SOURCE/programs/server/"{config,users}.xml "$FASTTEST_DATA"
|
||||
"$FASTTEST_SOURCE/tests/config/install.sh" "$FASTTEST_DATA" "$FASTTEST_DATA/client-config"
|
||||
cp -a "$FASTTEST_SOURCE/programs/server/config.d/log_to_console.xml" "$FASTTEST_DATA/config.d"
|
||||
# doesn't support SSL
|
||||
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
|
||||
}
|
||||
@ -185,63 +191,67 @@ stop_server ||:
|
||||
start_server
|
||||
|
||||
TESTS_TO_SKIP=(
|
||||
parquet
|
||||
avro
|
||||
h3
|
||||
odbc
|
||||
mysql
|
||||
sha256
|
||||
_orc_
|
||||
arrow
|
||||
01098_temporary_and_external_tables
|
||||
01083_expressions_in_engine_arguments
|
||||
hdfs
|
||||
00911_tautological_compare
|
||||
protobuf
|
||||
capnproto
|
||||
java_hash
|
||||
hashing
|
||||
secure
|
||||
00490_special_line_separators_and_characters_outside_of_bmp
|
||||
00436_convert_charset
|
||||
00105_shard_collations
|
||||
01354_order_by_tuple_collate_const
|
||||
01292_create_user
|
||||
01098_msgpack_format
|
||||
00929_multi_match_edit_distance
|
||||
00926_multimatch
|
||||
00834_cancel_http_readonly_queries_on_client_close
|
||||
brotli
|
||||
parallel_alter
|
||||
00109_shard_totals_after_having
|
||||
00110_external_sort
|
||||
00302_http_compression
|
||||
00417_kill_query
|
||||
01294_lazy_database_concurrent
|
||||
01193_metadata_loading
|
||||
base64
|
||||
01031_mutations_interpreter_and_context
|
||||
json
|
||||
client
|
||||
01305_replica_create_drop_zookeeper
|
||||
01092_memory_profiler
|
||||
01355_ilike
|
||||
01281_unsucceeded_insert_select_queries_counter
|
||||
live_view
|
||||
limit_memory
|
||||
memory_limit
|
||||
memory_leak
|
||||
00110_external_sort
|
||||
00436_convert_charset
|
||||
00490_special_line_separators_and_characters_outside_of_bmp
|
||||
00652_replicated_mutations_zookeeper
|
||||
00682_empty_parts_merge
|
||||
00701_rollup
|
||||
00109_shard_totals_after_having
|
||||
ddl_dictionaries
|
||||
00834_cancel_http_readonly_queries_on_client_close
|
||||
00911_tautological_compare
|
||||
00926_multimatch
|
||||
00929_multi_match_edit_distance
|
||||
01031_mutations_interpreter_and_context
|
||||
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
|
||||
01083_expressions_in_engine_arguments
|
||||
01092_memory_profiler
|
||||
01098_msgpack_format
|
||||
01098_temporary_and_external_tables
|
||||
01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally
|
||||
01193_metadata_loading
|
||||
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01251_dict_is_in_infinite_loop
|
||||
01259_dictionary_custom_settings_ddl
|
||||
01268_dictionary_direct_layout
|
||||
01280_ssd_complex_key_dictionary
|
||||
00652_replicated_mutations_zookeeper
|
||||
01411_bayesian_ab_testing
|
||||
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
|
||||
01318_encrypt # Depends on OpenSSL
|
||||
01318_decrypt # Depends on OpenSSL
|
||||
01281_unsucceeded_insert_select_queries_counter
|
||||
01292_create_user
|
||||
01294_lazy_database_concurrent
|
||||
01305_replica_create_drop_zookeeper
|
||||
01354_order_by_tuple_collate_const
|
||||
01355_ilike
|
||||
01411_bayesian_ab_testing
|
||||
_orc_
|
||||
arrow
|
||||
avro
|
||||
base64
|
||||
brotli
|
||||
capnproto
|
||||
client
|
||||
ddl_dictionaries
|
||||
h3
|
||||
hashing
|
||||
hdfs
|
||||
java_hash
|
||||
json
|
||||
limit_memory
|
||||
live_view
|
||||
memory_leak
|
||||
memory_limit
|
||||
mysql
|
||||
odbc
|
||||
parallel_alter
|
||||
parquet
|
||||
protobuf
|
||||
secure
|
||||
sha256
|
||||
|
||||
# Not sure why these two fail even in sequential mode. Disabled for now
|
||||
# to make some progress.
|
||||
@ -252,7 +262,7 @@ TESTS_TO_SKIP=(
|
||||
01460_DistributedFilesToInsert
|
||||
)
|
||||
|
||||
time clickhouse-test -j 8 --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
||||
# substr is to remove semicolon after test name
|
||||
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||
@ -269,13 +279,13 @@ then
|
||||
stop_server ||:
|
||||
|
||||
# Clean the data so that there is no interference from the previous test run.
|
||||
rm -rf "$FASTTEST_DATA"/{meta,}data ||:
|
||||
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
|
||||
|
||||
start_server
|
||||
|
||||
echo "Going to run again: ${FAILED_TESTS[*]}"
|
||||
|
||||
clickhouse-test --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
|
||||
clickhouse-test --order=random --no-long --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a "$FASTTEST_OUTPUT/test_log.txt"
|
||||
else
|
||||
echo "No failed tests"
|
||||
fi
|
||||
@ -329,7 +339,10 @@ case "$stage" in
|
||||
;&
|
||||
"run_tests")
|
||||
run_tests
|
||||
;&
|
||||
;;
|
||||
*)
|
||||
echo "Unknown test stage '$stage'"
|
||||
exit 1
|
||||
esac
|
||||
|
||||
pstree -apgT
|
||||
|
@ -37,7 +37,28 @@ RUN apt-get update \
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
RUN python3 -m pip install urllib3==1.23 pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2==2.7.5 pymongo tzlocal kafka-python protobuf redis aerospike pytest-timeout minio grpcio grpcio-tools cassandra-driver confluent-kafka avro
|
||||
RUN python3 -m pip install \
|
||||
PyMySQL \
|
||||
aerospike \
|
||||
avro \
|
||||
cassandra-driver \
|
||||
confluent-kafka \
|
||||
dicttoxml \
|
||||
docker \
|
||||
docker-compose==1.22.0 \
|
||||
grpcio \
|
||||
grpcio-tools \
|
||||
kafka-python \
|
||||
kazoo \
|
||||
minio \
|
||||
protobuf \
|
||||
psycopg2-binary==2.7.5 \
|
||||
pymongo \
|
||||
pytest \
|
||||
pytest-timeout \
|
||||
redis \
|
||||
tzlocal \
|
||||
urllib3
|
||||
|
||||
ENV DOCKER_CHANNEL stable
|
||||
ENV DOCKER_VERSION 17.09.1-ce
|
||||
|
@ -48,12 +48,13 @@ This table shows queries that take significantly longer to process on the client
|
||||
#### Unexpected Query Duration
|
||||
Action required for every item -- these are errors that must be fixed.
|
||||
|
||||
Queries that have "short" duration (on the order of 0.1 s) can't be reliably tested in a normal way, where we perform a small (about ten) measurements for each server, because the signal-to-noise ratio is much smaller. There is a special mode for such queries that instead runs them for a fixed amount of time, normally with much higher number of measurements (up to thousands). This mode must be explicitly enabled by the test author to avoid accidental errors. It must be used only for queries that are meant to complete "immediately", such as `select count(*)`. If your query is not supposed to be "immediate", try to make it run longer, by e.g. processing more data.
|
||||
A query is supposed to run longer than 0.1 second. If your query runs faster, increase the amount of processed data to bring the run time above this threshold. You can use a bigger table (e.g. `hits_100m` instead of `hits_10m`), increase a `LIMIT`, make a query single-threaded, and so on. Queries that are too fast suffer from poor stability and precision.
|
||||
|
||||
This table shows queries for which the "short" marking is not consistent with the actual query run time -- i.e., a query runs for a long time but is marked as short, or it runs very fast but is not marked as short.
|
||||
Sometimes you want to test a query that is supposed to complete "instantaneously", i.e. in sublinear time. This might be `count(*)`, or parsing a complicated tuple. It might not be practical or even possible to increase the run time of such queries by adding more data. For such queries there is a specal comparison mode which runs them for a fixed amount of time, instead of a fixed number of iterations like we do normally. This mode is inferior to the normal mode, because the influence of noise and overhead is higher, which leads to less precise and stable results.
|
||||
|
||||
If your query is really supposed to complete "immediately" and can't be made to run longer, you have to mark it as "short". To do so, write `<query short="1">...` in the test file. The value of "short" attribute is evaluated as a python expression, and substitutions are performed, so you can write something like `<query short="{column1} = {column2}">select count(*) from table where {column1} > {column2}</query>`, to mark only a particular combination of variables as short.
|
||||
If it is impossible to increase the run time of a query and it is supposed to complete "immediately", you have to explicitly mark this in the test. To do so, add a `short` attribute to the query tag in the test file: `<query short="1">...`. The value of the `short` attribute is evaluated as a python expression, and substitutions are performed, so you can write something like `<query short="{column1} = {column2}">select count(*) from table where {column1} > {column2}</query>`, to mark only a particular combination of variables as short.
|
||||
|
||||
This table shows queries for which the `short` marking is not consistent with the actual query run time -- i.e., a query runs for a normal time but is marked as `short`, or it runs faster than normal but is not marked as `short`.
|
||||
|
||||
#### Partial Queries
|
||||
Action required for the cells marked in red.
|
||||
|
@ -468,14 +468,14 @@ if args.report == 'main':
|
||||
return
|
||||
|
||||
columns = [
|
||||
'Test', #0
|
||||
'Wall clock time, s', #1
|
||||
'Total client time, s', #2
|
||||
'Total queries', #3
|
||||
'Longest query<br>(sum for all runs), s', #4
|
||||
'Avg wall clock time<br>(sum for all runs), s', #5
|
||||
'Shortest query<br>(sum for all runs), s', #6
|
||||
'', # Runs #7
|
||||
'Test', #0
|
||||
'Wall clock time, entire test, s', #1
|
||||
'Total client time for measured query runs, s', #2
|
||||
'Queries', #3
|
||||
'Longest query, total for measured runs, s', #4
|
||||
'Wall clock time per query, s', #5
|
||||
'Shortest query, total for measured runs, s', #6
|
||||
'', # Runs #7
|
||||
]
|
||||
attrs = ['' for c in columns]
|
||||
attrs[7] = None
|
||||
|
@ -48,4 +48,8 @@ if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test ; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
fi
|
||||
|
||||
clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
# We can have several additional options so we path them as array because it's
|
||||
# more idiologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
@ -105,7 +105,11 @@ if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
fi
|
||||
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
# We can have several additional options so we path them as array because it's
|
||||
# more idiologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
LLVM_PROFILE_FILE='client_%h_%p_%m.profraw' clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
||||
kill_clickhouse
|
||||
|
||||
|
@ -33,5 +33,8 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
ENV TZ=Europe/Moscow
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
ENV NUM_TRIES=1
|
||||
ENV MAX_RUN_TIME=0
|
||||
|
||||
COPY run.sh /
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
@ -1,6 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e -x
|
||||
# fail on errors, verbose and export all env variables
|
||||
set -e -x -a
|
||||
|
||||
dpkg -i package_folder/clickhouse-common-static_*.deb
|
||||
dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
|
||||
@ -16,5 +17,17 @@ service clickhouse-server start && sleep 5
|
||||
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
fi
|
||||
# We can have several additional options so we path them as array because it's
|
||||
# more idiologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
function run_tests()
|
||||
{
|
||||
for i in $(seq 1 $NUM_TRIES); do
|
||||
clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt
|
||||
done
|
||||
}
|
||||
|
||||
export -f run_tests
|
||||
|
||||
timeout $MAX_RUN_TIME bash -c run_tests ||:
|
||||
|
@ -51,7 +51,11 @@ if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
|
||||
SKIP_LIST_OPT="--use-skip-list"
|
||||
fi
|
||||
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
# We can have several additional options so we path them as array because it's
|
||||
# more idiologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
LLVM_PROFILE_FILE='client_coverage.profraw' clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
||||
kill_clickhouse
|
||||
|
||||
|
@ -45,7 +45,7 @@ function start()
|
||||
# for clickhouse-server (via service)
|
||||
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
|
||||
# for clickhouse-client
|
||||
export ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'
|
||||
export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
|
||||
|
||||
start
|
||||
|
||||
|
@ -28,8 +28,18 @@ def get_options(i):
|
||||
options = ""
|
||||
if 0 < i:
|
||||
options += " --order=random"
|
||||
|
||||
if i % 2 == 1:
|
||||
options += " --db-engine=Ordinary"
|
||||
|
||||
# If database name is not specified, new database is created for each functional test.
|
||||
# Run some threads with one database for all tests.
|
||||
if i % 3 == 1:
|
||||
options += " --database=test_{}".format(i)
|
||||
|
||||
if i == 13:
|
||||
options += " --client-option='memory_tracker_fault_probability=0.00001'"
|
||||
|
||||
return options
|
||||
|
||||
|
||||
|
@ -45,7 +45,7 @@ A `Block` is a container that represents a subset (chunk) of a table in memory.
|
||||
|
||||
When we calculate some function over columns in a block, we add another column with its result to the block, and we don’t touch columns for arguments of the function because operations are immutable. Later, unneeded columns can be removed from the block, but not modified. It is convenient for the elimination of common subexpressions.
|
||||
|
||||
Blocks are created for every processed chunk of data. Note that for the same type of calculation, the column names and types remain the same for different blocks, and only column data changes. It is better to split block data from the block header because small block sizes have a high overhead of temporary strings for copying shared\_ptrs and column names.
|
||||
Blocks are created for every processed chunk of data. Note that for the same type of calculation, the column names and types remain the same for different blocks, and only column data changes. It is better to split block data from the block header because small block sizes have a high overhead of temporary strings for copying shared_ptrs and column names.
|
||||
|
||||
## Block Streams {#block-streams}
|
||||
|
||||
|
@ -7,7 +7,7 @@ toc_title: Build on Linux
|
||||
|
||||
Supported platforms:
|
||||
|
||||
- x86\_64
|
||||
- x86_64
|
||||
- AArch64
|
||||
- Power9 (experimental)
|
||||
|
||||
|
@ -26,7 +26,7 @@ toc_title: Third-Party Libraries Used
|
||||
| libpcg-random | [Apache License 2.0](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libpcg-random/LICENSE-APACHE.txt) |
|
||||
| libressl | [OpenSSL License](https://github.com/ClickHouse-Extras/ssl/blob/master/COPYING) |
|
||||
| librdkafka | [BSD 2-Clause License](https://github.com/edenhill/librdkafka/blob/363dcad5a23dc29381cc626620e68ae418b3af19/LICENSE) |
|
||||
| libwidechar\_width | [CC0 1.0 Universal](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libwidechar_width/LICENSE) |
|
||||
| libwidechar_width | [CC0 1.0 Universal](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libwidechar_width/LICENSE) |
|
||||
| llvm | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/llvm/blob/163def217817c90fb982a6daf384744d8472b92b/llvm/LICENSE.TXT) |
|
||||
| lz4 | [BSD 2-Clause License](https://github.com/lz4/lz4/blob/c10863b98e1503af90616ae99725ecd120265dfb/LICENSE) |
|
||||
| mariadb-connector-c | [LGPL v2.1](https://github.com/ClickHouse-Extras/mariadb-connector-c/blob/3.1/COPYING.LIB) |
|
||||
|
@ -40,7 +40,7 @@ In the command line terminal run:
|
||||
git clone --recursive git@github.com:your_github_username/ClickHouse.git
|
||||
cd ClickHouse
|
||||
|
||||
Note: please, substitute *your\_github\_username* with what is appropriate!
|
||||
Note: please, substitute *your_github_username* with what is appropriate!
|
||||
|
||||
This command will create a directory `ClickHouse` containing the working copy of the project.
|
||||
|
||||
@ -150,7 +150,7 @@ Now that you are ready to build ClickHouse we recommend you to create a separate
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
You can have several different directories (build\_release, build\_debug, etc.) for different types of build.
|
||||
You can have several different directories (build_release, build_debug, etc.) for different types of build.
|
||||
|
||||
While inside the `build` directory, configure your build by running CMake. Before the first run, you need to define environment variables that specify compiler (version 9 gcc compiler in this example).
|
||||
|
||||
|
@ -354,7 +354,7 @@ In all other cases, use a name that describes the meaning.
|
||||
bool info_successfully_loaded = false;
|
||||
```
|
||||
|
||||
**9.** Names of `define`s and global constants use ALL\_CAPS with underscores.
|
||||
**9.** Names of `define`s and global constants use ALL_CAPS with underscores.
|
||||
|
||||
``` cpp
|
||||
#define MAX_SRC_TABLE_NAMES_TO_STORE 1000
|
||||
@ -394,7 +394,7 @@ The underscore suffix can be omitted if the argument is not used in the construc
|
||||
timer (not m_timer)
|
||||
```
|
||||
|
||||
**14.** For the constants in an `enum`, use CamelCase with a capital letter. ALL\_CAPS is also acceptable. If the `enum` is non-local, use an `enum class`.
|
||||
**14.** For the constants in an `enum`, use CamelCase with a capital letter. ALL_CAPS is also acceptable. If the `enum` is non-local, use an `enum class`.
|
||||
|
||||
``` cpp
|
||||
enum class CompressionMethod
|
||||
@ -707,7 +707,7 @@ The standard library is used (`libc++`).
|
||||
|
||||
**4.**OS: Linux Ubuntu, not older than Precise.
|
||||
|
||||
**5.**Code is written for x86\_64 CPU architecture.
|
||||
**5.**Code is written for x86_64 CPU architecture.
|
||||
|
||||
The CPU instruction set is the minimum supported set among our servers. Currently, it is SSE 4.2.
|
||||
|
||||
|
@ -220,7 +220,7 @@ Debug version of `jemalloc` is used for debug build.
|
||||
ClickHouse fuzzing is implemented both using [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and random SQL queries.
|
||||
All the fuzz testing should be performed with sanitizers (Address and Undefined).
|
||||
|
||||
LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have “\_fuzzer” name postfixes.
|
||||
LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have “_fuzzer” name postfixes.
|
||||
Fuzzer example can be found at `src/Parsers/tests/lexer_fuzzer.cpp`. LibFuzzer-specific configs, dictionaries and corpus are stored at `tests/fuzz`.
|
||||
We encourage you to write fuzz tests for every functionality that handles user input.
|
||||
|
||||
|
@ -71,12 +71,12 @@ Constructions with `{}` are similar to the [remote](../../../sql-reference/table
|
||||
|
||||
1. Suppose we have several files in TSV format with the following URIs on HDFS:
|
||||
|
||||
- ‘hdfs://hdfs1:9000/some\_dir/some\_file\_1’
|
||||
- ‘hdfs://hdfs1:9000/some\_dir/some\_file\_2’
|
||||
- ‘hdfs://hdfs1:9000/some\_dir/some\_file\_3’
|
||||
- ‘hdfs://hdfs1:9000/another\_dir/some\_file\_1’
|
||||
- ‘hdfs://hdfs1:9000/another\_dir/some\_file\_2’
|
||||
- ‘hdfs://hdfs1:9000/another\_dir/some\_file\_3’
|
||||
- ‘hdfs://hdfs1:9000/some_dir/some_file_1’
|
||||
- ‘hdfs://hdfs1:9000/some_dir/some_file_2’
|
||||
- ‘hdfs://hdfs1:9000/some_dir/some_file_3’
|
||||
- ‘hdfs://hdfs1:9000/another_dir/some_file_1’
|
||||
- ‘hdfs://hdfs1:9000/another_dir/some_file_2’
|
||||
- ‘hdfs://hdfs1:9000/another_dir/some_file_3’
|
||||
|
||||
1. There are several ways to make a table consisting of all six files:
|
||||
|
||||
|
@ -134,7 +134,7 @@ Example:
|
||||
SELECT level, sum(total) FROM daily GROUP BY level;
|
||||
```
|
||||
|
||||
To improve performance, received messages are grouped into blocks the size of [max\_insert\_block\_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size). If the block wasn’t formed within [stream\_flush\_interval\_ms](../../../operations/server-configuration-parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
|
||||
To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size). If the block wasn’t formed within [stream_flush_interval_ms](../../../operations/server-configuration-parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
|
||||
|
||||
To stop receiving topic data or to change the conversion logic, detach the materialized view:
|
||||
|
||||
@ -192,6 +192,6 @@ Example:
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
- [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)
|
||||
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) <!--hide-->
|
||||
|
@ -109,7 +109,7 @@ Setting `rabbitmq_queue_base` may be used for the following cases:
|
||||
- to be able to restore reading from certain durable queues when not all messages were successfully consumed. To resume consumption from one specific queue - set its name in `rabbitmq_queue_base` setting and do not specify `rabbitmq_num_consumers` and `rabbitmq_num_queues` (defaults to 1). To resume consumption from all queues, which were declared for a specific table - just specify the same settings: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. By default, queue names will be unique to tables.
|
||||
- to reuse queues as they are declared durable and not auto-deleted. (Can be deleted via any of RabbitMQ CLI tools.)
|
||||
|
||||
To improve performance, received messages are grouped into blocks the size of [max\_insert\_block\_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size). If the block wasn’t formed within [stream\_flush\_interval\_ms](../../../operations/server-configuration-parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
|
||||
To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size). If the block wasn’t formed within [stream_flush_interval_ms](../../../operations/server-configuration-parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
|
||||
|
||||
If `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings are specified along with `rabbitmq_exchange_type`, then:
|
||||
|
||||
|
@ -114,7 +114,7 @@ drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 12:09 201902_4_6_1
|
||||
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 detached
|
||||
```
|
||||
|
||||
The folders ‘201901\_1\_1\_0’, ‘201901\_1\_7\_1’ and so on are the directories of the parts. Each part relates to a corresponding partition and contains data just for a certain month (the table in this example has partitioning by month).
|
||||
The folders ‘201901_1_1_0’, ‘201901_1_7_1’ and so on are the directories of the parts. Each part relates to a corresponding partition and contains data just for a certain month (the table in this example has partitioning by month).
|
||||
|
||||
The `detached` directory contains parts that were detached from the table using the [DETACH](../../../sql-reference/statements/alter/partition.md#alter_detach-partition) query. The corrupted parts are also moved to this directory, instead of being deleted. The server does not use the parts from the `detached` directory. You can add, delete, or modify the data in this directory at any time – the server will not know about this until you run the [ATTACH](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query.
|
||||
|
||||
|
@ -79,7 +79,7 @@ All of the parameters excepting `config_section` have the same meaning as in `Me
|
||||
|
||||
## Rollup Configuration {#rollup-configuration}
|
||||
|
||||
The settings for rollup are defined by the [graphite\_rollup](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) parameter in the server configuration. The name of the parameter could be any. You can create several configurations and use them for different tables.
|
||||
The settings for rollup are defined by the [graphite_rollup](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) parameter in the server configuration. The name of the parameter could be any. You can create several configurations and use them for different tables.
|
||||
|
||||
Rollup configuration structure:
|
||||
|
||||
|
@ -205,7 +205,7 @@ The number of columns in the primary key is not explicitly limited. Depending on
|
||||
|
||||
A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries.
|
||||
|
||||
You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max\_insert\_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
|
||||
You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max_insert_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
|
||||
|
||||
To select data in the initial order, use [single-threaded](../../../operations/settings/settings.md#settings-max_threads) `SELECT` queries.
|
||||
|
||||
@ -248,7 +248,7 @@ In the example below, the index can’t be used.
|
||||
SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%'
|
||||
```
|
||||
|
||||
To check whether ClickHouse can use the index when running a query, use the settings [force\_index\_by\_date](../../../operations/settings/settings.md#settings-force_index_by_date) and [force\_primary\_key](../../../operations/settings/settings.md).
|
||||
To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](../../../operations/settings/settings.md#settings-force_index_by_date) and [force_primary_key](../../../operations/settings/settings.md).
|
||||
|
||||
The key for partitioning by month allows reading only those data blocks which contain dates from the proper range. In this case, the data block may contain data for many dates (up to an entire month). Within a block, data is sorted by primary key, which might not contain the date as the first column. Because of this, using a query with only a date condition that does not specify the primary key prefix will cause more data to be read than for a single date.
|
||||
|
||||
@ -339,7 +339,7 @@ Conditions in the `WHERE` clause contains calls of the functions that operate wi
|
||||
|
||||
The `set` index can be used with all functions. Function subsets for other indexes are shown in the table below.
|
||||
|
||||
| Function (operator) / Index | primary key | minmax | ngrambf\_v1 | tokenbf\_v1 | bloom\_filter |
|
||||
| Function (operator) / Index | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter |
|
||||
|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
|
||||
| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
@ -505,7 +505,7 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be
|
||||
- Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)).
|
||||
- Storage policy — Set of volumes and the rules for moving data between them.
|
||||
|
||||
The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables/storage_policies.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables/disks.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
|
||||
The names given to the described entities can be found in the system tables, [system.storage_policies](../../../operations/system-tables/storage_policies.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables/disks.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
|
||||
|
||||
### Configuration {#table_engine-mergetree-multiple-volumes_configure}
|
||||
|
||||
@ -635,7 +635,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
|
||||
|
||||
The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`. Once a table is created, its storage policy cannot be changed.
|
||||
|
||||
The number of threads performing background moves of data parts can be changed by [background\_move\_pool\_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
|
||||
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
|
||||
|
||||
### Details {#details}
|
||||
|
||||
@ -654,7 +654,7 @@ In all these cases except for mutations and partition freezing, a part is stored
|
||||
Under the hood, mutations and partition freezing make use of [hard links](https://en.wikipedia.org/wiki/Hard_link). Hard links between different disks are not supported, therefore in such cases the resulting parts are stored on the same disks as the initial ones.
|
||||
|
||||
In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file.
|
||||
Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
|
||||
Data is never transferred from the last one and into the first one. One may use system tables [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
|
||||
|
||||
User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter/partition.md#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met.
|
||||
|
||||
|
@ -57,19 +57,19 @@ You can specify any existing ZooKeeper cluster and the system will use a directo
|
||||
|
||||
If ZooKeeper isn’t set in the config file, you can’t create replicated tables, and any existing replicated tables will be read-only.
|
||||
|
||||
ZooKeeper is not used in `SELECT` queries because replication does not affect the performance of `SELECT` and queries run just as fast as they do for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max\_replica\_delay\_for\_distributed\_queries](../../../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) and [fallback\_to\_stale\_replicas\_for\_distributed\_queries](../../../operations/settings/settings.md#settings-fallback_to_stale_replicas_for_distributed_queries).
|
||||
ZooKeeper is not used in `SELECT` queries because replication does not affect the performance of `SELECT` and queries run just as fast as they do for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max_replica_delay_for_distributed_queries](../../../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) and [fallback_to_stale_replicas_for_distributed_queries](../../../operations/settings/settings.md#settings-fallback_to_stale_replicas_for_distributed_queries).
|
||||
|
||||
For each `INSERT` query, approximately ten entries are added to ZooKeeper through several transactions. (To be more precise, this is for each inserted block of data; an INSERT query contains one block or one block per `max_insert_block_size = 1048576` rows.) This leads to slightly longer latencies for `INSERT` compared to non-replicated tables. But if you follow the recommendations to insert data in batches of no more than one `INSERT` per second, it doesn’t create any problems. The entire ClickHouse cluster used for coordinating one ZooKeeper cluster has a total of several hundred `INSERTs` per second. The throughput on data inserts (the number of rows per second) is just as high as for non-replicated data.
|
||||
|
||||
For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasn’t proven necessary on the Yandex.Metrica cluster (approximately 300 servers).
|
||||
|
||||
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
|
||||
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
|
||||
|
||||
By default, an INSERT query waits for confirmation of writing the data from only one replica. If the data was successfully written to only one replica and the server with this replica ceases to exist, the stored data will be lost. To enable getting confirmation of data writes from multiple replicas, use the `insert_quorum` option.
|
||||
|
||||
Each block of data is written atomically. The INSERT query is divided into blocks up to `max_insert_block_size = 1048576` rows. In other words, if the `INSERT` query has less than 1048576 rows, it is made atomically.
|
||||
|
||||
Data blocks are deduplicated. For multiple writes of the same data block (data blocks of the same size containing the same rows in the same order), the block is only written once. The reason for this is in case of network failures when the client application doesn’t know if the data was written to the DB, so the `INSERT` query can simply be repeated. It doesn’t matter which replica INSERTs were sent to with identical data. `INSERTs` are idempotent. Deduplication parameters are controlled by [merge\_tree](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-merge_tree) server settings.
|
||||
Data blocks are deduplicated. For multiple writes of the same data block (data blocks of the same size containing the same rows in the same order), the block is only written once. The reason for this is in case of network failures when the client application doesn’t know if the data was written to the DB, so the `INSERT` query can simply be repeated. It doesn’t matter which replica INSERTs were sent to with identical data. `INSERTs` are idempotent. Deduplication parameters are controlled by [merge_tree](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-merge_tree) server settings.
|
||||
|
||||
During replication, only the source data to insert is transferred over the network. Further data transformation (merging) is coordinated and performed on all the replicas in the same way. This minimizes network usage, which means that replication works well when replicas reside in different datacenters. (Note that duplicating data in different datacenters is the main goal of replication.)
|
||||
|
||||
@ -117,7 +117,9 @@ CREATE TABLE table_name
|
||||
|
||||
</details>
|
||||
|
||||
As the example shows, these parameters can contain substitutions in curly brackets. The substituted values are taken from the ‘macros’ section of the configuration file. Example:
|
||||
As the example shows, these parameters can contain substitutions in curly brackets. The substituted values are taken from the «[macros](../../../operations/server-configuration-parameters/settings/#macros) section of the configuration file.
|
||||
|
||||
Example:
|
||||
|
||||
``` xml
|
||||
<macros>
|
||||
@ -137,6 +139,9 @@ In this case, the path consists of the following parts:
|
||||
`table_name` is the name of the node for the table in ZooKeeper. It is a good idea to make it the same as the table name. It is defined explicitly, because in contrast to the table name, it doesn’t change after a RENAME query.
|
||||
*HINT*: you could add a database name in front of `table_name` as well. E.g. `db_name.table_name`
|
||||
|
||||
The two built-in substitutions `{database}` and `{table}` can be used, they expand into the table name and the database name respectively (unless these macros are defined in the `macros` section). So the zookeeper path can be specified as `'/clickhouse/tables/{layer}-{shard}/{database}/{table}'`.
|
||||
Be careful with table renames when using these built-in substitutions. The path in Zookeeper cannot be changed, and when the table is renamed, the macros will expand into a different path, the table will refer to a path that does not exist in Zookeeper, and will go into read-only mode.
|
||||
|
||||
The replica name identifies different replicas of the same table. You can use the server name for this, as in the example. The name only needs to be unique within each shard.
|
||||
|
||||
You can define the parameters explicitly instead of using substitutions. This might be convenient for testing and for configuring small clusters. However, you can’t use distributed DDL queries (`ON CLUSTER`) in this case.
|
||||
@ -217,6 +222,6 @@ If the data in ZooKeeper was lost or damaged, you can save data by moving it to
|
||||
|
||||
**See also**
|
||||
|
||||
- [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)
|
||||
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/replication/) <!--hide-->
|
||||
|
@ -97,13 +97,13 @@ Cluster names must not contain dots.
|
||||
|
||||
The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server:
|
||||
- `host` – The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesn’t start. If you change the DNS record, restart the server.
|
||||
- `port` – The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http\_port.
|
||||
- `port` – The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http_port.
|
||||
- `user` – Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md).
|
||||
- `password` – The password for connecting to a remote server (not masked). Default value: empty string.
|
||||
- `secure` - Use ssl for connection, usually you also should define `port` = 9440. Server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and have correct certificates.
|
||||
- `compression` - Use data compression. Default value: true.
|
||||
|
||||
When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load\_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting.
|
||||
When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) – see the [load_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting.
|
||||
If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times.
|
||||
This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly.
|
||||
|
||||
@ -144,11 +144,11 @@ You should be concerned about the sharding scheme in the following cases:
|
||||
- Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient.
|
||||
- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we’ve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
|
||||
|
||||
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
|
||||
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
|
||||
|
||||
If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used.
|
||||
|
||||
When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
|
||||
When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
@ -160,6 +160,6 @@ When the `max_parallel_replicas` option is enabled, query processing is parallel
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns)
|
||||
- [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)
|
||||
- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/distributed/) <!--hide-->
|
||||
|
@ -25,10 +25,10 @@ You may have multiple sections like this, for the number of tables being transmi
|
||||
**–file** – Path to the file with the table dump, or -, which refers to stdin.
|
||||
Only a single table can be retrieved from stdin.
|
||||
|
||||
The following parameters are optional: **–name**– Name of the table. If omitted, \_data is used.
|
||||
The following parameters are optional: **–name**– Name of the table. If omitted, _data is used.
|
||||
**–format** – Data format in the file. If omitted, TabSeparated is used.
|
||||
|
||||
One of the following parameters is required:**–types** – A list of comma-separated column types. For example: `UInt64,String`. The columns will be named \_1, \_2, …
|
||||
One of the following parameters is required:**–types** – A list of comma-separated column types. For example: `UInt64,String`. The columns will be named _1, _2, …
|
||||
**–structure**– The table structure in the format`UserID UInt64`, `URL String`. Defines the column names and types.
|
||||
|
||||
The files specified in ‘file’ will be parsed by the format specified in ‘format’, using the data types specified in ‘types’ or ‘structure’. The table will be uploaded to the server and accessible there as a temporary table with the name in ‘name’.
|
||||
|
@ -93,15 +93,15 @@ You cannot perform a `SELECT` query directly from the table. Instead, use one of
|
||||
|
||||
When creating a table, the following settings are applied:
|
||||
|
||||
- [join\_use\_nulls](../../../operations/settings/settings.md#join_use_nulls)
|
||||
- [max\_rows\_in\_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join)
|
||||
- [max\_bytes\_in\_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join)
|
||||
- [join\_overflow\_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode)
|
||||
- [join\_any\_take\_last\_row](../../../operations/settings/settings.md#settings-join_any_take_last_row)
|
||||
- [join_use_nulls](../../../operations/settings/settings.md#join_use_nulls)
|
||||
- [max_rows_in_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join)
|
||||
- [max_bytes_in_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join)
|
||||
- [join_overflow_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode)
|
||||
- [join_any_take_last_row](../../../operations/settings/settings.md#settings-join_any_take_last_row)
|
||||
|
||||
The `Join`-engine tables can’t be used in `GLOBAL JOIN` operations.
|
||||
|
||||
The `Join`-engine allows use [join\_use\_nulls](../../../operations/settings/settings.md#join_use_nulls) setting in the `CREATE TABLE` statement. And [SELECT](../../../sql-reference/statements/select/index.md) query allows use `join_use_nulls` too. If you have different `join_use_nulls` settings, you can get an error joining table. It depends on kind of JOIN. When you use [joinGet](../../../sql-reference/functions/other-functions.md#joinget) function, you have to use the same `join_use_nulls` setting in `CRATE TABLE` and `SELECT` statements.
|
||||
The `Join`-engine allows use [join_use_nulls](../../../operations/settings/settings.md#join_use_nulls) setting in the `CREATE TABLE` statement. And [SELECT](../../../sql-reference/statements/select/index.md) query allows use `join_use_nulls` too. If you have different `join_use_nulls` settings, you can get an error joining table. It depends on kind of JOIN. When you use [joinGet](../../../sql-reference/functions/other-functions.md#joinget) function, you have to use the same `join_use_nulls` setting in `CRATE TABLE` and `SELECT` statements.
|
||||
|
||||
## Data Storage {#data-storage}
|
||||
|
||||
|
@ -33,7 +33,7 @@ The typical way to use the `Merge` engine is for working with a large number of
|
||||
|
||||
Example 2:
|
||||
|
||||
Let’s say you have a old table (WatchLog\_old) and decided to change partitioning without moving data to a new table (WatchLog\_new) and you need to see data from both tables.
|
||||
Let’s say you have a old table (WatchLog_old) and decided to change partitioning without moving data to a new table (WatchLog_new) and you need to see data from both tables.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE WatchLog_old(date Date, UserId Int64, EventType String, Cnt UInt64)
|
||||
|
@ -23,7 +23,7 @@ additional headers for getting a response from the server.
|
||||
respectively. For processing `POST` requests, the remote server must support
|
||||
[Chunked transfer encoding](https://en.wikipedia.org/wiki/Chunked_transfer_encoding).
|
||||
|
||||
You can limit the maximum number of HTTP GET redirect hops using the [max\_http\_get\_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting.
|
||||
You can limit the maximum number of HTTP GET redirect hops using the [max_http_get_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting.
|
||||
|
||||
## Example {#example}
|
||||
|
||||
|
@ -40,7 +40,7 @@ Question candidates:
|
||||
- How to implement pivot (like in pandas)?
|
||||
- How to remove the default ClickHouse user through users.d?
|
||||
- Importing MySQL dump to Clickhouse
|
||||
- Window function workarounds (row\_number, lag/lead, running diff/sum/average)
|
||||
- Window function workarounds (row_number, lag/lead, running diff/sum/average)
|
||||
##}
|
||||
|
||||
{## [Original article](https://clickhouse.tech/docs/en/faq) ##}
|
||||
|
@ -6,7 +6,7 @@ toc_priority: 20
|
||||
|
||||
# What If I Have a Problem with Encodings When Using Oracle Via ODBC? {#oracle-odbc-encodings}
|
||||
|
||||
If you use Oracle as a source of ClickHouse external dictionaries via Oracle ODBC driver, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS\_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
|
||||
If you use Oracle as a source of ClickHouse external dictionaries via Oracle ODBC driver, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -7,7 +7,7 @@ toc_title: AMPLab Big Data Benchmark
|
||||
|
||||
See https://amplab.cs.berkeley.edu/benchmark/
|
||||
|
||||
Sign up for a free account at https://aws.amazon.com. It requires a credit card, email, and phone number. Get a new access key at https://console.aws.amazon.com/iam/home?nc2=h\_m\_sc\#security\_credential
|
||||
Sign up for a free account at https://aws.amazon.com. It requires a credit card, email, and phone number. Get a new access key at https://console.aws.amazon.com/iam/home?nc2=h_m_sc#security_credential
|
||||
|
||||
Run the following in the console:
|
||||
|
||||
|
@ -195,7 +195,7 @@ The data in this table uses 142 GB.
|
||||
|
||||
(Importing data directly from Postgres is also possible using `COPY ... TO PROGRAM`.)
|
||||
|
||||
Unfortunately, all the fields associated with the weather (precipitation…average\_wind\_speed) were filled with NULL. Because of this, we will remove them from the final data set.
|
||||
Unfortunately, all the fields associated with the weather (precipitation…average_wind_speed) were filled with NULL. Because of this, we will remove them from the final data set.
|
||||
|
||||
To start, we’ll create a table on a single server. Later we will make the table distributed.
|
||||
|
||||
|
@ -7,9 +7,9 @@ toc_title: Installation
|
||||
|
||||
## System Requirements {#system-requirements}
|
||||
|
||||
ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86\_64, AArch64, or PowerPC64LE CPU architecture.
|
||||
ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
|
||||
|
||||
Official pre-built binaries are typically compiled for x86\_64 and leverage SSE 4.2 instruction set, so unless otherwise stated usage of CPU that supports it becomes an additional system requirement. Here’s the command to check if current CPU has support for SSE 4.2:
|
||||
Official pre-built binaries are typically compiled for x86_64 and leverage SSE 4.2 instruction set, so unless otherwise stated usage of CPU that supports it becomes an additional system requirement. Here’s the command to check if current CPU has support for SSE 4.2:
|
||||
|
||||
``` bash
|
||||
$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported"
|
||||
|
@ -38,10 +38,10 @@ The queries are executed as a read-only user. It implies some limitations:
|
||||
|
||||
The following settings are also enforced:
|
||||
|
||||
- [max\_result\_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
|
||||
- [max\_result\_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
|
||||
- [result\_overflow\_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
|
||||
- [max\_execution\_time=60000](../operations/settings/query_complexity/#max-execution-time)
|
||||
- [max_result_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
|
||||
- [max_result_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
|
||||
- [result_overflow_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
|
||||
- [max_execution_time=60000](../operations/settings/query_complexity/#max-execution-time)
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
|
@ -11,10 +11,10 @@ In a “normal” row-oriented DBMS, data is stored in this order:
|
||||
|
||||
| Row | WatchID | JavaEnable | Title | GoodEvent | EventTime |
|
||||
|-----|-------------|------------|--------------------|-----------|---------------------|
|
||||
| \#0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 |
|
||||
| \#1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 |
|
||||
| \#2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 |
|
||||
| \#N | … | … | … | … | … |
|
||||
| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 |
|
||||
| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 |
|
||||
| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 |
|
||||
| #N | … | … | … | … | … |
|
||||
|
||||
In other words, all the values related to a row are physically stored next to each other.
|
||||
|
||||
@ -22,7 +22,7 @@ Examples of a row-oriented DBMS are MySQL, Postgres, and MS SQL Server.
|
||||
|
||||
In a column-oriented DBMS, data is stored like this:
|
||||
|
||||
| Row: | \#0 | \#1 | \#2 | \#N |
|
||||
| Row: | #0 | #1 | #2 | #N |
|
||||
|-------------|---------------------|---------------------|---------------------|-----|
|
||||
| WatchID: | 89354350662 | 90329509958 | 89953706054 | … |
|
||||
| JavaEnable: | 1 | 0 | 1 | … |
|
||||
|
@ -43,6 +43,7 @@ The supported formats are:
|
||||
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
|
||||
| [PrettySpace](#prettyspace) | ✗ | ✔ |
|
||||
| [Protobuf](#protobuf) | ✔ | ✔ |
|
||||
| [ProtobufSingle](#protobufsingle) | ✔ | ✔ |
|
||||
| [Avro](#data-format-avro) | ✔ | ✔ |
|
||||
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
@ -55,6 +56,7 @@ The supported formats are:
|
||||
| [Null](#null) | ✗ | ✔ |
|
||||
| [XML](#xml) | ✗ | ✔ |
|
||||
| [CapnProto](#capnproto) | ✔ | ✗ |
|
||||
| [LineAsString](#lineasstring) | ✔ | ✗ |
|
||||
|
||||
You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section.
|
||||
|
||||
@ -210,7 +212,7 @@ Setting `format_template_resultset` specifies the path to file, which contains a
|
||||
- `min` is the row with minimum values in `format_template_row` format (when extremes are set to 1)
|
||||
- `max` is the row with maximum values in `format_template_row` format (when extremes are set to 1)
|
||||
- `rows` is the total number of output rows
|
||||
- `rows_before_limit` is the minimal number of rows there would have been without LIMIT. Output only if the query contains LIMIT. If the query contains GROUP BY, rows\_before\_limit\_at\_least is the exact number of rows there would have been without a LIMIT.
|
||||
- `rows_before_limit` is the minimal number of rows there would have been without LIMIT. Output only if the query contains LIMIT. If the query contains GROUP BY, rows_before_limit_at_least is the exact number of rows there would have been without a LIMIT.
|
||||
- `time` is the request execution time in seconds
|
||||
- `rows_read` is the number of rows has been read
|
||||
- `bytes_read` is the number of bytes (uncompressed) has been read
|
||||
@ -361,21 +363,21 @@ Parsing allows the presence of the additional field `tskv` without the equal sig
|
||||
|
||||
Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)).
|
||||
|
||||
When formatting, rows are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format\_csv\_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost).
|
||||
When formatting, rows are enclosed in double-quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double-quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double-quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost).
|
||||
|
||||
``` bash
|
||||
$ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv
|
||||
```
|
||||
|
||||
\*By default, the delimiter is `,`. See the [format\_csv\_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter) setting for more information.
|
||||
\*By default, the delimiter is `,`. See the [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter) setting for more information.
|
||||
|
||||
When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported.
|
||||
|
||||
Empty unquoted input values are replaced with default values for the respective columns, if
|
||||
[input\_format\_defaults\_for\_omitted\_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields)
|
||||
[input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields)
|
||||
is enabled.
|
||||
|
||||
`NULL` is formatted as `\N` or `NULL` or an empty unquoted string (see settings [input\_format\_csv\_unquoted\_null\_literal\_as\_null](../operations/settings/settings.md#settings-input_format_csv_unquoted_null_literal_as_null) and [input\_format\_defaults\_for\_omitted\_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields)).
|
||||
`NULL` is formatted as `\N` or `NULL` or an empty unquoted string (see settings [input_format_csv_unquoted_null_literal_as_null](../operations/settings/settings.md#settings-input_format_csv_unquoted_null_literal_as_null) and [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields)).
|
||||
|
||||
The CSV format supports the output of totals and extremes the same way as `TabSeparated`.
|
||||
|
||||
@ -439,12 +441,12 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA
|
||||
}
|
||||
```
|
||||
|
||||
The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character <20> so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double-quotes by default. To remove the quotes, you can set the configuration parameter [output\_format\_json\_quote\_64bit\_integers](../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) to 0.
|
||||
The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character <20> so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double-quotes by default. To remove the quotes, you can set the configuration parameter [output_format_json_quote_64bit_integers](../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) to 0.
|
||||
|
||||
`rows` – The total number of output rows.
|
||||
|
||||
`rows_before_limit_at_least` The minimal number of rows there would have been without LIMIT. Output only if the query contains LIMIT.
|
||||
If the query contains GROUP BY, rows\_before\_limit\_at\_least is the exact number of rows there would have been without a LIMIT.
|
||||
If the query contains GROUP BY, rows_before_limit_at_least is the exact number of rows there would have been without a LIMIT.
|
||||
|
||||
`totals` – Total values (when using WITH TOTALS).
|
||||
|
||||
@ -452,7 +454,7 @@ If the query contains GROUP BY, rows\_before\_limit\_at\_least is the exact numb
|
||||
|
||||
This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table).
|
||||
|
||||
ClickHouse supports [NULL](../sql-reference/syntax.md), which is displayed as `null` in the JSON output. To enable `+nan`, `-nan`, `+inf`, `-inf` values in output, set the [output\_format\_json\_quote\_denormals](../operations/settings/settings.md#settings-output_format_json_quote_denormals) to 1.
|
||||
ClickHouse supports [NULL](../sql-reference/syntax.md), which is displayed as `null` in the JSON output. To enable `+nan`, `-nan`, `+inf`, `-inf` values in output, set the [output_format_json_quote_denormals](../operations/settings/settings.md#settings-output_format_json_quote_denormals) to 1.
|
||||
|
||||
See also the [JSONEachRow](#jsoneachrow) format.
|
||||
|
||||
@ -633,7 +635,7 @@ ClickHouse ignores spaces between elements and commas after the objects. You can
|
||||
|
||||
ClickHouse substitutes omitted values with the default values for the corresponding [data types](../sql-reference/data-types/index.md).
|
||||
|
||||
If `DEFAULT expr` is specified, ClickHouse uses different substitution rules depending on the [input\_format\_defaults\_for\_omitted\_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting.
|
||||
If `DEFAULT expr` is specified, ClickHouse uses different substitution rules depending on the [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) setting.
|
||||
|
||||
Consider the following table:
|
||||
|
||||
@ -676,7 +678,7 @@ Unlike the [JSON](#json) format, there is no substitution of invalid UTF-8 seque
|
||||
|
||||
### Usage of Nested Structures {#jsoneachrow-nested}
|
||||
|
||||
If you have a table with [Nested](../sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input\_format\_import\_nested\_json](../operations/settings/settings.md#settings-input_format_import_nested_json) setting.
|
||||
If you have a table with [Nested](../sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](../operations/settings/settings.md#settings-input_format_import_nested_json) setting.
|
||||
|
||||
For example, consider the following table:
|
||||
|
||||
@ -690,7 +692,7 @@ As you can see in the `Nested` data type description, ClickHouse treats each com
|
||||
INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n.s": ["abc", "def"], "n.i": [1, 23]}
|
||||
```
|
||||
|
||||
To insert data as a hierarchical JSON object, set [input\_format\_import\_nested\_json=1](../operations/settings/settings.md#settings-input_format_import_nested_json).
|
||||
To insert data as a hierarchical JSON object, set [input_format_import_nested_json=1](../operations/settings/settings.md#settings-input_format_import_nested_json).
|
||||
|
||||
``` json
|
||||
{
|
||||
@ -872,7 +874,7 @@ The minimum set of characters that you need to escape when passing data in Value
|
||||
|
||||
This is the format that is used in `INSERT INTO t VALUES ...`, but you can also use it for formatting query results.
|
||||
|
||||
See also: [input\_format\_values\_interpret\_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) and [input\_format\_values\_deduce\_templates\_of\_expressions](../operations/settings/settings.md#settings-input_format_values_deduce_templates_of_expressions) settings.
|
||||
See also: [input_format_values_interpret_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) and [input_format_values_deduce_templates_of_expressions](../operations/settings/settings.md#settings-input_format_values_deduce_templates_of_expressions) settings.
|
||||
|
||||
## Vertical {#vertical}
|
||||
|
||||
@ -1075,6 +1077,10 @@ ClickHouse inputs and outputs protobuf messages in the `length-delimited` format
|
||||
It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints).
|
||||
See also [how to read/write length-delimited protobuf messages in popular languages](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages).
|
||||
|
||||
## ProtobufSingle {#protobufsingle}
|
||||
|
||||
Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters.
|
||||
|
||||
## Avro {#data-format-avro}
|
||||
|
||||
[Apache Avro](https://avro.apache.org/) is a row-oriented data serialization framework developed within Apache’s Hadoop project.
|
||||
@ -1135,7 +1141,7 @@ Column names must:
|
||||
- start with `[A-Za-z_]`
|
||||
- subsequently contain only `[A-Za-z0-9_]`
|
||||
|
||||
Output Avro file compression and sync interval can be configured with [output\_format\_avro\_codec](../operations/settings/settings.md#settings-output_format_avro_codec) and [output\_format\_avro\_sync\_interval](../operations/settings/settings.md#settings-output_format_avro_sync_interval) respectively.
|
||||
Output Avro file compression and sync interval can be configured with [output_format_avro_codec](../operations/settings/settings.md#settings-output_format_avro_codec) and [output_format_avro_sync_interval](../operations/settings/settings.md#settings-output_format_avro_sync_interval) respectively.
|
||||
|
||||
## AvroConfluent {#data-format-avro-confluent}
|
||||
|
||||
@ -1145,7 +1151,7 @@ Each Avro message embeds a schema id that can be resolved to the actual schema w
|
||||
|
||||
Schemas are cached once resolved.
|
||||
|
||||
Schema Registry URL is configured with [format\_avro\_schema\_registry\_url](../operations/settings/settings.md#format_avro_schema_registry_url).
|
||||
Schema Registry URL is configured with [format_avro_schema_registry_url](../operations/settings/settings.md#format_avro_schema_registry_url).
|
||||
|
||||
### Data Types Matching {#data_types-matching-1}
|
||||
|
||||
@ -1298,15 +1304,38 @@ can contain an absolute path or a path relative to the current directory on the
|
||||
If you use the client in the [batch mode](../interfaces/cli.md#cli_usage), the path to the schema must be relative due to security reasons.
|
||||
|
||||
If you input or output data via the [HTTP interface](../interfaces/http.md) the file name specified in the format schema
|
||||
should be located in the directory specified in [format\_schema\_path](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-format_schema_path)
|
||||
should be located in the directory specified in [format_schema_path](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-format_schema_path)
|
||||
in the server configuration.
|
||||
|
||||
## Skipping Errors {#skippingerrors}
|
||||
|
||||
Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input\_format\_allow\_errors\_num](../operations/settings/settings.md#settings-input_format_allow_errors_num) and
|
||||
[input\_format\_allow\_errors\_ratio](../operations/settings/settings.md#settings-input_format_allow_errors_ratio) settings.
|
||||
Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](../operations/settings/settings.md#settings-input_format_allow_errors_num) and
|
||||
[input_format_allow_errors_ratio](../operations/settings/settings.md#settings-input_format_allow_errors_ratio) settings.
|
||||
Limitations:
|
||||
- In case of parsing error `JSONEachRow` skips all data until the new line (or EOF), so rows must be delimited by `\n` to count errors correctly.
|
||||
- `Template` and `CustomSeparated` use delimiter after the last column and delimiter between rows to find the beginning of next row, so skipping errors works only if at least one of them is not empty.
|
||||
|
||||
## LineAsString {#lineasstring}
|
||||
|
||||
In this format, a sequence of string objects separated by a newline character is interpreted as a single value. This format can only be parsed for table with a single field of type [String](../sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](../sql-reference/statements/create/table.md#default) or [MATERIALIZED](../sql-reference/statements/create/table.md#materialized), or omitted.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
DROP TABLE IF EXISTS line_as_string;
|
||||
CREATE TABLE line_as_string (field String) ENGINE = Memory;
|
||||
INSERT INTO line_as_string FORMAT LineAsString "I love apple", "I love banana", "I love orange";
|
||||
SELECT * FROM line_as_string;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─field─────────────────────────────────────────────┐
|
||||
│ "I love apple", "I love banana", "I love orange"; │
|
||||
└───────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/interfaces/formats/) <!--hide-->
|
||||
|
@ -9,7 +9,7 @@ The HTTP interface lets you use ClickHouse on any platform from any programming
|
||||
|
||||
By default, clickhouse-server listens for HTTP on port 8123 (this can be changed in the config).
|
||||
|
||||
If you make a GET / request without parameters, it returns 200 response code and the string which defined in [http\_server\_default\_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end)
|
||||
If you make a GET / request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end)
|
||||
|
||||
``` bash
|
||||
$ curl 'http://localhost:8123/'
|
||||
@ -148,12 +148,12 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
|
||||
|
||||
For successful requests that don’t return a data table, an empty response body is returned.
|
||||
|
||||
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http\_native\_compression\_disable\_checksumming\_on\_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
|
||||
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
|
||||
|
||||
If you specified `compress=1` in the URL, the server compresses the data it sends you.
|
||||
If you specified `decompress=1` in the URL, the server decompresses the same data that you pass in the `POST` method.
|
||||
|
||||
You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. In order for ClickHouse to compress the response, you must append `Accept-Encoding: compression_method`. ClickHouse supports `gzip`, `br`, and `deflate` [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens). To enable HTTP compression, you must use the ClickHouse [enable\_http\_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the data compression level in the [http\_zlib\_compression\_level](#settings-http_zlib_compression_level) setting for all the compression methods.
|
||||
You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. In order for ClickHouse to compress the response, you must append `Accept-Encoding: compression_method`. ClickHouse supports `gzip`, `br`, and `deflate` [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens). To enable HTTP compression, you must use the ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the data compression level in the [http_zlib_compression_level](#settings-http_zlib_compression_level) setting for all the compression methods.
|
||||
|
||||
You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed.
|
||||
|
||||
@ -215,7 +215,7 @@ $ echo 'SELECT 1' | curl -H 'X-ClickHouse-User: user' -H 'X-ClickHouse-Key: pass
|
||||
```
|
||||
|
||||
If the user name is not specified, the `default` name is used. If the password is not specified, the empty password is used.
|
||||
You can also use the URL parameters to specify any settings for processing a single query or entire profiles of settings. Example:http://localhost:8123/?profile=web&max\_rows\_to\_read=1000000000&query=SELECT+1
|
||||
You can also use the URL parameters to specify any settings for processing a single query or entire profiles of settings. Example:http://localhost:8123/?profile=web&max_rows_to_read=1000000000&query=SELECT+1
|
||||
|
||||
For more information, see the [Settings](../operations/settings/index.md) section.
|
||||
|
||||
@ -237,7 +237,7 @@ For information about other parameters, see the section “SET”.
|
||||
|
||||
Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you need to add the `session_id` GET parameter to the request. You can use any string as the session ID. By default, the session is terminated after 60 seconds of inactivity. To change this timeout, modify the `default_session_timeout` setting in the server configuration, or add the `session_timeout` GET parameter to the request. To check the session status, use the `session_check=1` parameter. Only one query at a time can be executed within a single session.
|
||||
|
||||
You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send\_progress\_in\_http\_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence:
|
||||
You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence:
|
||||
|
||||
``` text
|
||||
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"}
|
||||
@ -254,9 +254,9 @@ Possible header fields:
|
||||
- `written_bytes` — Volume of data written in bytes.
|
||||
|
||||
Running requests don’t stop automatically if the HTTP connection is lost. Parsing and data formatting are performed on the server-side, and using the network might be ineffective.
|
||||
The optional ‘query\_id’ parameter can be passed as the query ID (any string). For more information, see the section “Settings, replace\_running\_query”.
|
||||
The optional ‘query_id’ parameter can be passed as the query ID (any string). For more information, see the section “Settings, replace_running_query”.
|
||||
|
||||
The optional ‘quota\_key’ parameter can be passed as the quota key (any string). For more information, see the section “Quotas”.
|
||||
The optional ‘quota_key’ parameter can be passed as the quota key (any string). For more information, see the section “Quotas”.
|
||||
|
||||
The HTTP interface allows passing external data (external temporary tables) for querying. For more information, see the section “External data for query processing”.
|
||||
|
||||
@ -377,7 +377,7 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`:
|
||||
- `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request.
|
||||
|
||||
- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`.
|
||||
`type` currently supports three types: [predefined\_query\_handler](#predefined_query_handler), [dynamic\_query\_handler](#dynamic_query_handler), [static](#static).
|
||||
`type` currently supports three types: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static).
|
||||
|
||||
- `query` — use with `predefined_query_handler` type, executes query when the handler is called.
|
||||
|
||||
@ -391,13 +391,13 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`:
|
||||
|
||||
Next are the configuration methods for different `type`.
|
||||
|
||||
### predefined\_query\_handler {#predefined_query_handler}
|
||||
### predefined_query_handler {#predefined_query_handler}
|
||||
|
||||
`predefined_query_handler` supports setting `Settings` and `query_params` values. You can configure `query` in the type of `predefined_query_handler`.
|
||||
|
||||
`query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration.
|
||||
|
||||
The following example defines the values of [max\_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
|
||||
The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
|
||||
|
||||
Example:
|
||||
|
||||
@ -428,13 +428,13 @@ max_alter_threads 2
|
||||
!!! note "caution"
|
||||
In one `predefined_query_handler` only supports one `query` of an insert type.
|
||||
|
||||
### dynamic\_query\_handler {#dynamic_query_handler}
|
||||
### dynamic_query_handler {#dynamic_query_handler}
|
||||
|
||||
In `dynamic_query_handler`, the query is written in the form of param of the HTTP request. The difference is that in `predefined_query_handler`, the query is written in the configuration file. You can configure `query_param_name` in `dynamic_query_handler`.
|
||||
|
||||
ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in.
|
||||
|
||||
To experiment with this functionality, the example defines the values of [max\_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully.
|
||||
To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully.
|
||||
|
||||
Example:
|
||||
|
||||
@ -459,7 +459,7 @@ max_alter_threads 2
|
||||
|
||||
### static {#static}
|
||||
|
||||
`static` can return [content\_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and `response_content`. `response_content` can return the specified content.
|
||||
`static` can return [content_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and `response_content`. `response_content` can return the specified content.
|
||||
|
||||
Example:
|
||||
|
||||
|
@ -5,7 +5,7 @@ toc_title: MySQL Interface
|
||||
|
||||
# MySQL Interface {#mysql-interface}
|
||||
|
||||
ClickHouse supports MySQL wire protocol. It can be enabled by [mysql\_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting in configuration file:
|
||||
ClickHouse supports MySQL wire protocol. It can be enabled by [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting in configuration file:
|
||||
|
||||
``` xml
|
||||
<mysql_port>9004</mysql_port>
|
||||
|
@ -9,7 +9,7 @@ toc_title: Client Libraries
|
||||
Yandex does **not** maintain the libraries listed below and haven’t done any extensive testing to ensure their quality.
|
||||
|
||||
- Python
|
||||
- [infi.clickhouse\_orm](https://github.com/Infinidat/infi.clickhouse_orm)
|
||||
- [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)
|
||||
- [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
|
||||
- [clickhouse-client](https://github.com/yurial/clickhouse-client)
|
||||
- [aiochclient](https://github.com/maximdanilchenko/aiochclient)
|
||||
@ -46,7 +46,7 @@ toc_title: Client Libraries
|
||||
- [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client)
|
||||
- Kotlin
|
||||
- [AORM](https://github.com/TanVD/AORM)
|
||||
- C\#
|
||||
- C#
|
||||
- [Octonica.ClickHouseClient](https://github.com/Octonica/ClickHouseClient)
|
||||
- [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net)
|
||||
- [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client)
|
||||
|
22
docs/en/interfaces/third-party/integrations.md
vendored
22
docs/en/interfaces/third-party/integrations.md
vendored
@ -17,15 +17,15 @@ toc_title: Integrations
|
||||
- [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader)
|
||||
- [horgh-replicator](https://github.com/larsnovikov/horgh-replicator)
|
||||
- [PostgreSQL](https://www.postgresql.org)
|
||||
- [clickhousedb\_fdw](https://github.com/Percona-Lab/clickhousedb_fdw)
|
||||
- [infi.clickhouse\_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (uses [infi.clickhouse\_orm](https://github.com/Infinidat/infi.clickhouse_orm))
|
||||
- [clickhousedb_fdw](https://github.com/Percona-Lab/clickhousedb_fdw)
|
||||
- [infi.clickhouse_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm))
|
||||
- [pg2ch](https://github.com/mkabilov/pg2ch)
|
||||
- [clickhouse\_fdw](https://github.com/adjust/clickhouse_fdw)
|
||||
- [clickhouse_fdw](https://github.com/adjust/clickhouse_fdw)
|
||||
- [MSSQL](https://en.wikipedia.org/wiki/Microsoft_SQL_Server)
|
||||
- [ClickHouseMigrator](https://github.com/zlzforever/ClickHouseMigrator)
|
||||
- Message queues
|
||||
- [Kafka](https://kafka.apache.org)
|
||||
- [clickhouse\_sinker](https://github.com/housepower/clickhouse_sinker) (uses [Go client](https://github.com/ClickHouse/clickhouse-go/))
|
||||
- [clickhouse_sinker](https://github.com/housepower/clickhouse_sinker) (uses [Go client](https://github.com/ClickHouse/clickhouse-go/))
|
||||
- [stream-loader-clickhouse](https://github.com/adform/stream-loader)
|
||||
- Stream processing
|
||||
- [Flink](https://flink.apache.org)
|
||||
@ -49,12 +49,12 @@ toc_title: Integrations
|
||||
- [Grafana](https://grafana.com/)
|
||||
- [clickhouse-grafana](https://github.com/Vertamedia/clickhouse-grafana)
|
||||
- [Prometheus](https://prometheus.io/)
|
||||
- [clickhouse\_exporter](https://github.com/f1yegor/clickhouse_exporter)
|
||||
- [clickhouse_exporter](https://github.com/f1yegor/clickhouse_exporter)
|
||||
- [PromHouse](https://github.com/Percona-Lab/PromHouse)
|
||||
- [clickhouse\_exporter](https://github.com/hot-wifi/clickhouse_exporter) (uses [Go client](https://github.com/kshvakov/clickhouse/))
|
||||
- [clickhouse_exporter](https://github.com/hot-wifi/clickhouse_exporter) (uses [Go client](https://github.com/kshvakov/clickhouse/))
|
||||
- [Nagios](https://www.nagios.org/)
|
||||
- [check\_clickhouse](https://github.com/exogroup/check_clickhouse/)
|
||||
- [check\_clickhouse.py](https://github.com/innogames/igmonplugins/blob/master/src/check_clickhouse.py)
|
||||
- [check_clickhouse](https://github.com/exogroup/check_clickhouse/)
|
||||
- [check_clickhouse.py](https://github.com/innogames/igmonplugins/blob/master/src/check_clickhouse.py)
|
||||
- [Zabbix](https://www.zabbix.com)
|
||||
- [clickhouse-zabbix-template](https://github.com/Altinity/clickhouse-zabbix-template)
|
||||
- [Sematext](https://sematext.com/)
|
||||
@ -74,7 +74,7 @@ toc_title: Integrations
|
||||
|
||||
- Python
|
||||
- [SQLAlchemy](https://www.sqlalchemy.org)
|
||||
- [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse\_orm](https://github.com/Infinidat/infi.clickhouse_orm))
|
||||
- [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm))
|
||||
- [pandas](https://pandas.pydata.org)
|
||||
- [pandahouse](https://github.com/kszucs/pandahouse)
|
||||
- PHP
|
||||
@ -89,7 +89,7 @@ toc_title: Integrations
|
||||
- Scala
|
||||
- [Akka](https://akka.io)
|
||||
- [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client)
|
||||
- C\#
|
||||
- C#
|
||||
- [ADO.NET](https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/ado-net-overview)
|
||||
- [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net)
|
||||
- [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client)
|
||||
@ -97,7 +97,7 @@ toc_title: Integrations
|
||||
- [ClickHouse.Net.Migrations](https://github.com/ilyabreev/ClickHouse.Net.Migrations)
|
||||
- Elixir
|
||||
- [Ecto](https://github.com/elixir-ecto/ecto)
|
||||
- [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto)
|
||||
- [clickhouse_ecto](https://github.com/appodeal/clickhouse_ecto)
|
||||
- Ruby
|
||||
- [Ruby on Rails](https://rubyonrails.org/)
|
||||
- [activecube](https://github.com/bitquery/activecube)
|
||||
|
@ -60,6 +60,7 @@ toc_title: Adopters
|
||||
| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
|
||||
| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
|
||||
| <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a> | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) |
|
||||
| <a href="https://mellodesign.ru/" class="favicon">Mello</a> | Marketing | Analytics | 1 server | — | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) |
|
||||
| <a href="https://www.messagebird.com" class="favicon">MessageBird</a> | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
|
||||
| <a href="https://www.mindsdb.com/" class="favicon">MindsDB</a> | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |
|
||||
| <a href="https://www.mgid.com/" class="favicon">MGID</a> | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
|
||||
@ -68,6 +69,7 @@ toc_title: Adopters
|
||||
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
|
||||
| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
|
||||
| <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
|
||||
| <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) |
|
||||
| <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
|
||||
| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
|
||||
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
|
||||
@ -86,6 +88,7 @@ toc_title: Adopters
|
||||
| <a href="https://smi2.ru/" class="favicon">SMI2</a> | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
|
||||
| <a href="https://www.splunk.com/" class="favicon">Splunk</a> | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
|
||||
| <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
|
||||
| <a href="https://www.staffcop.ru/" class="favicon">Staffcop</a> | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) |
|
||||
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
|
||||
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
|
||||
| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
|
||||
|
@ -138,10 +138,10 @@ Management queries:
|
||||
|
||||
- Setup a directory for configurations storage.
|
||||
|
||||
ClickHouse stores access entity configurations in the folder set in the [access\_control\_path](../operations/server-configuration-parameters/settings.md#access_control_path) server configuration parameter.
|
||||
ClickHouse stores access entity configurations in the folder set in the [access_control_path](../operations/server-configuration-parameters/settings.md#access_control_path) server configuration parameter.
|
||||
|
||||
- Enable SQL-driven access control and account management for at least one user account.
|
||||
|
||||
By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access\_management](../operations/settings/settings-users.md#access_management-user-setting) setting to 1.
|
||||
By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access_management](../operations/settings/settings-users.md#access_management-user-setting) setting to 1.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/access_rights/) <!--hide-->
|
||||
|
@ -20,7 +20,7 @@ Some settings specified in the main configuration file can be overridden in othe
|
||||
|
||||
## Substitution {#substitution}
|
||||
|
||||
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)).
|
||||
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)).
|
||||
|
||||
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
|
||||
|
||||
|
@ -33,7 +33,7 @@ ClickHouse collects:
|
||||
- Different metrics of how the server uses computational resources.
|
||||
- Common statistics on query processing.
|
||||
|
||||
You can find metrics in the [system.metrics](../operations/system-tables/metrics.md#system_tables-metrics), [system.events](../operations/system-tables/events.md#system_tables-events), and [system.asynchronous\_metrics](../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) tables.
|
||||
You can find metrics in the [system.metrics](../operations/system-tables/metrics.md#system_tables-metrics), [system.events](../operations/system-tables/events.md#system_tables-events), and [system.asynchronous_metrics](../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) tables.
|
||||
|
||||
You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html).
|
||||
|
||||
@ -41,4 +41,4 @@ You can configure ClickHouse to export metrics to [Prometheus](https://prometheu
|
||||
|
||||
Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/ping`. If the server is available, it responds with `200 OK`.
|
||||
|
||||
To monitor servers in a cluster configuration, you should set the [max\_replica\_delay\_for\_distributed\_queries](../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap.
|
||||
To monitor servers in a cluster configuration, you should set the [max_replica_delay_for_distributed_queries](../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap.
|
||||
|
@ -9,11 +9,11 @@ ClickHouse runs sampling profiler that allows analyzing query execution. Using p
|
||||
|
||||
To use profiler:
|
||||
|
||||
- Setup the [trace\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration.
|
||||
- Setup the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration.
|
||||
|
||||
This section configures the [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesn’t clean up the table and all the stored virtual memory address may become invalid.
|
||||
This section configures the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesn’t clean up the table and all the stored virtual memory address may become invalid.
|
||||
|
||||
- Setup the [query\_profiler\_cpu\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) or [query\_profiler\_real\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously.
|
||||
- Setup the [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) or [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously.
|
||||
|
||||
These settings allow you to configure profiler timers. As these are the session settings, you can get different sampling frequency for the whole server, individual users or user profiles, for your interactive session, and for each individual query.
|
||||
|
||||
@ -23,7 +23,7 @@ To analyze the `trace_log` system table:
|
||||
|
||||
- Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages).
|
||||
|
||||
- Allow introspection functions by the [allow\_introspection\_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting.
|
||||
- Allow introspection functions by the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting.
|
||||
|
||||
For security reasons, introspection functions are disabled by default.
|
||||
|
||||
|
@ -7,7 +7,7 @@ toc_title: Requirements
|
||||
|
||||
## CPU {#cpu}
|
||||
|
||||
For installation from prebuilt deb packages, use a CPU with x86\_64 architecture and support for SSE 4.2 instructions. To run ClickHouse with processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources.
|
||||
For installation from prebuilt deb packages, use a CPU with x86_64 architecture and support for SSE 4.2 instructions. To run ClickHouse with processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources.
|
||||
|
||||
ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz.
|
||||
|
||||
|
@ -5,7 +5,7 @@ toc_title: Server Settings
|
||||
|
||||
# Server Settings {#server-settings}
|
||||
|
||||
## builtin\_dictionaries\_reload\_interval {#builtin-dictionaries-reload-interval}
|
||||
## builtin_dictionaries_reload_interval {#builtin-dictionaries-reload-interval}
|
||||
|
||||
The interval in seconds before reloading built-in dictionaries.
|
||||
|
||||
@ -92,7 +92,7 @@ Configures soft limit for core dump file size, one gigabyte by default.
|
||||
(Hard limit is configured via system tools)
|
||||
|
||||
|
||||
## default\_database {#default-database}
|
||||
## default_database {#default-database}
|
||||
|
||||
The default database.
|
||||
|
||||
@ -104,7 +104,7 @@ To get a list of databases, use the [SHOW DATABASES](../../sql-reference/stateme
|
||||
<default_database>default</default_database>
|
||||
```
|
||||
|
||||
## default\_profile {#default-profile}
|
||||
## default_profile {#default-profile}
|
||||
|
||||
Default settings profile.
|
||||
|
||||
@ -116,7 +116,7 @@ Settings profiles are located in the file specified in the parameter `user_confi
|
||||
<default_profile>default</default_profile>
|
||||
```
|
||||
|
||||
## dictionaries\_config {#server_configuration_parameters-dictionaries_config}
|
||||
## dictionaries_config {#server_configuration_parameters-dictionaries_config}
|
||||
|
||||
The path to the config file for external dictionaries.
|
||||
|
||||
@ -133,7 +133,7 @@ See also “[External dictionaries](../../sql-reference/dictionaries/external-di
|
||||
<dictionaries_config>*_dictionary.xml</dictionaries_config>
|
||||
```
|
||||
|
||||
## dictionaries\_lazy\_load {#server_configuration_parameters-dictionaries_lazy_load}
|
||||
## dictionaries_lazy_load {#server_configuration_parameters-dictionaries_lazy_load}
|
||||
|
||||
Lazy loading of dictionaries.
|
||||
|
||||
@ -149,7 +149,7 @@ The default is `true`.
|
||||
<dictionaries_lazy_load>true</dictionaries_lazy_load>
|
||||
```
|
||||
|
||||
## format\_schema\_path {#server_configuration_parameters-format_schema_path}
|
||||
## format_schema_path {#server_configuration_parameters-format_schema_path}
|
||||
|
||||
The path to the directory with the schemes for the input data, such as schemas for the [CapnProto](../../interfaces/formats.md#capnproto) format.
|
||||
|
||||
@ -170,11 +170,11 @@ Settings:
|
||||
- port – The port on the Graphite server.
|
||||
- interval – The interval for sending, in seconds.
|
||||
- timeout – The timeout for sending data, in seconds.
|
||||
- root\_path – Prefix for keys.
|
||||
- root_path – Prefix for keys.
|
||||
- metrics – Sending data from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
|
||||
- events – Sending deltas data accumulated for the time period from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
|
||||
- events\_cumulative – Sending cumulative data from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
|
||||
- asynchronous\_metrics – Sending data from the [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
|
||||
- events_cumulative – Sending cumulative data from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
|
||||
- asynchronous_metrics – Sending data from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
|
||||
|
||||
You can configure multiple `<graphite>` clauses. For instance, you can use this for sending different data at different intervals.
|
||||
|
||||
@ -194,7 +194,7 @@ You can configure multiple `<graphite>` clauses. For instance, you can use this
|
||||
</graphite>
|
||||
```
|
||||
|
||||
## graphite\_rollup {#server_configuration_parameters-graphite-rollup}
|
||||
## graphite_rollup {#server_configuration_parameters-graphite-rollup}
|
||||
|
||||
Settings for thinning data for Graphite.
|
||||
|
||||
@ -222,7 +222,7 @@ For more details, see [GraphiteMergeTree](../../engines/table-engines/mergetree-
|
||||
</graphite_rollup_example>
|
||||
```
|
||||
|
||||
## http\_port/https\_port {#http-porthttps-port}
|
||||
## http_port/https_port {#http-porthttps-port}
|
||||
|
||||
The port for connecting to the server over HTTP(s).
|
||||
|
||||
@ -236,7 +236,7 @@ If `http_port` is specified, the OpenSSL configuration is ignored even if it is
|
||||
<https_port>9999</https_port>
|
||||
```
|
||||
|
||||
## http\_server\_default\_response {#server_configuration_parameters-http_server_default_response}
|
||||
## http_server_default_response {#server_configuration_parameters-http_server_default_response}
|
||||
|
||||
The page that is shown by default when you access the ClickHouse HTTP(s) server.
|
||||
The default value is “Ok.” (with a line feed at the end)
|
||||
@ -251,7 +251,7 @@ Opens `https://tabix.io/` when accessing `http://localhost: http_port`.
|
||||
</http_server_default_response>
|
||||
```
|
||||
|
||||
## include\_from {#server_configuration_parameters-include_from}
|
||||
## include_from {#server_configuration_parameters-include_from}
|
||||
|
||||
The path to the file with substitutions.
|
||||
|
||||
@ -263,7 +263,7 @@ For more information, see the section “[Configuration files](../../operations/
|
||||
<include_from>/etc/metrica.xml</include_from>
|
||||
```
|
||||
|
||||
## interserver\_http\_port {#interserver-http-port}
|
||||
## interserver_http_port {#interserver-http-port}
|
||||
|
||||
Port for exchanging data between ClickHouse servers.
|
||||
|
||||
@ -273,7 +273,7 @@ Port for exchanging data between ClickHouse servers.
|
||||
<interserver_http_port>9009</interserver_http_port>
|
||||
```
|
||||
|
||||
## interserver\_http\_host {#interserver-http-host}
|
||||
## interserver_http_host {#interserver-http-host}
|
||||
|
||||
The hostname that can be used by other servers to access this server.
|
||||
|
||||
@ -287,7 +287,7 @@ Useful for breaking away from a specific network interface.
|
||||
<interserver_http_host>example.yandex.ru</interserver_http_host>
|
||||
```
|
||||
|
||||
## interserver\_http\_credentials {#server-settings-interserver-http-credentials}
|
||||
## interserver_http_credentials {#server-settings-interserver-http-credentials}
|
||||
|
||||
The username and password used to authenticate during [replication](../../engines/table-engines/mergetree-family/replication.md) with the Replicated\* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster.
|
||||
By default, the authentication is not used.
|
||||
@ -306,7 +306,7 @@ This section contains the following parameters:
|
||||
</interserver_http_credentials>
|
||||
```
|
||||
|
||||
## keep\_alive\_timeout {#keep-alive-timeout}
|
||||
## keep_alive_timeout {#keep-alive-timeout}
|
||||
|
||||
The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 3 seconds.
|
||||
|
||||
@ -316,7 +316,7 @@ The number of seconds that ClickHouse waits for incoming requests before closing
|
||||
<keep_alive_timeout>3</keep_alive_timeout>
|
||||
```
|
||||
|
||||
## listen\_host {#server_configuration_parameters-listen_host}
|
||||
## listen_host {#server_configuration_parameters-listen_host}
|
||||
|
||||
Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`.
|
||||
|
||||
@ -367,25 +367,25 @@ Writing to the syslog is also supported. Config example:
|
||||
|
||||
Keys for syslog:
|
||||
|
||||
- use\_syslog — Required setting if you want to write to the syslog.
|
||||
- use_syslog — Required setting if you want to write to the syslog.
|
||||
- address — The host\[:port\] of syslogd. If omitted, the local daemon is used.
|
||||
- hostname — Optional. The name of the host that logs are sent from.
|
||||
- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) in uppercase letters with the “LOG\_” prefix: (`LOG_USER`, `LOG_DAEMON`, `LOG_LOCAL3`, and so on).
|
||||
- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) in uppercase letters with the “LOG_” prefix: (`LOG_USER`, `LOG_DAEMON`, `LOG_LOCAL3`, and so on).
|
||||
Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON` otherwise.
|
||||
- format – Message format. Possible values: `bsd` and `syslog.`
|
||||
|
||||
## send\_crash\_reports {#server_configuration_parameters-logger}
|
||||
## send_crash_reports {#server_configuration_parameters-logger}
|
||||
|
||||
Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io).
|
||||
Enabling it, especially in pre-production environments, is greatly appreciated.
|
||||
Enabling it, especially in pre-production environments, is highly appreciated.
|
||||
|
||||
The server will need an access to public Internet via IPv4 (at the time of writing IPv6 is not supported by Sentry) for this feature to be functioning properly.
|
||||
The server will need access to the public Internet via IPv4 (at the time of writing IPv6 is not supported by Sentry) for this feature to be functioning properly.
|
||||
|
||||
Keys:
|
||||
|
||||
- `enabled` – Boolean flag to enable the feature, `false` by default. Set to `true` to allow sending crash reports.
|
||||
- `endpoint` – You can override the Sentry endpoint URL for sending crash reports. It can be either separate Sentry account or your self-hosted Sentry instance. Use the [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk) syntax.
|
||||
- `anonymize` - Avoid attaching the server hostname to crash report.
|
||||
- `endpoint` – You can override the Sentry endpoint URL for sending crash reports. It can be either a separate Sentry account or your self-hosted Sentry instance. Use the [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk) syntax.
|
||||
- `anonymize` - Avoid attaching the server hostname to the crash report.
|
||||
- `http_proxy` - Configure HTTP proxy for sending crash reports.
|
||||
- `debug` - Sets the Sentry client into debug mode.
|
||||
- `tmp_path` - Filesystem path for temporary crash report state.
|
||||
@ -412,7 +412,7 @@ For more information, see the section “[Creating replicated tables](../../engi
|
||||
<macros incl="macros" optional="true" />
|
||||
```
|
||||
|
||||
## mark\_cache\_size {#server-mark-cache-size}
|
||||
## mark_cache_size {#server-mark-cache-size}
|
||||
|
||||
Approximate size (in bytes) of the cache of marks used by table engines of the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family.
|
||||
|
||||
@ -424,7 +424,7 @@ The cache is shared for the server and memory is allocated as needed. The cache
|
||||
<mark_cache_size>5368709120</mark_cache_size>
|
||||
```
|
||||
|
||||
## max\_server\_memory\_usage {#max_server_memory_usage}
|
||||
## max_server_memory_usage {#max_server_memory_usage}
|
||||
|
||||
Limits total RAM usage by the ClickHouse server.
|
||||
|
||||
@ -441,7 +441,7 @@ The default `max_server_memory_usage` value is calculated as `memory_amount * ma
|
||||
|
||||
**See also**
|
||||
|
||||
- [max\_memory\_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage)
|
||||
- [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage)
|
||||
- [max_server_memory_usage_to_ram_ratio](#max_server_memory_usage_to_ram_ratio)
|
||||
|
||||
## max_server_memory_usage_to_ram_ratio {#max_server_memory_usage_to_ram_ratio}
|
||||
@ -469,7 +469,7 @@ On hosts with low RAM and swap, you possibly need setting `max_server_memory_usa
|
||||
|
||||
- [max_server_memory_usage](#max_server_memory_usage)
|
||||
|
||||
## max\_concurrent\_queries {#max-concurrent-queries}
|
||||
## max_concurrent_queries {#max-concurrent-queries}
|
||||
|
||||
The maximum number of simultaneously processed requests.
|
||||
|
||||
@ -479,7 +479,7 @@ The maximum number of simultaneously processed requests.
|
||||
<max_concurrent_queries>100</max_concurrent_queries>
|
||||
```
|
||||
|
||||
## max\_connections {#max-connections}
|
||||
## max_connections {#max-connections}
|
||||
|
||||
The maximum number of inbound connections.
|
||||
|
||||
@ -489,7 +489,7 @@ The maximum number of inbound connections.
|
||||
<max_connections>4096</max_connections>
|
||||
```
|
||||
|
||||
## max\_open\_files {#max-open-files}
|
||||
## max_open_files {#max-open-files}
|
||||
|
||||
The maximum number of open files.
|
||||
|
||||
@ -503,7 +503,7 @@ We recommend using this option in Mac OS X since the `getrlimit()` function retu
|
||||
<max_open_files>262144</max_open_files>
|
||||
```
|
||||
|
||||
## max\_table\_size\_to\_drop {#max-table-size-to-drop}
|
||||
## max_table_size_to_drop {#max-table-size-to-drop}
|
||||
|
||||
Restriction on deleting tables.
|
||||
|
||||
@ -521,7 +521,7 @@ The value 0 means that you can delete all tables without any restrictions.
|
||||
<max_table_size_to_drop>0</max_table_size_to_drop>
|
||||
```
|
||||
|
||||
## max\_thread\_pool\_size {#max-thread-pool-size}
|
||||
## max_thread_pool_size {#max-thread-pool-size}
|
||||
|
||||
The maximum number of threads in the Global Thread pool.
|
||||
|
||||
@ -533,7 +533,7 @@ Default value: 10000.
|
||||
<max_thread_pool_size>12000</max_thread_pool_size>
|
||||
```
|
||||
|
||||
## merge\_tree {#server_configuration_parameters-merge_tree}
|
||||
## merge_tree {#server_configuration_parameters-merge_tree}
|
||||
|
||||
Fine tuning for tables in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
|
||||
|
||||
@ -547,7 +547,7 @@ For more information, see the MergeTreeSettings.h header file.
|
||||
</merge_tree>
|
||||
```
|
||||
|
||||
## replicated\_merge\_tree {#server_configuration_parameters-replicated_merge_tree}
|
||||
## replicated_merge_tree {#server_configuration_parameters-replicated_merge_tree}
|
||||
|
||||
Fine tuning for tables in the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
|
||||
|
||||
@ -584,7 +584,7 @@ Keys for server/client settings:
|
||||
- sessionTimeout – Time for caching the session on the server.
|
||||
- extendedVerification – Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1 – Require a TLSv1 connection. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1\_1 – Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1_1 – Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
|
||||
- requireTLSv1 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
|
||||
- fips – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS.
|
||||
- privateKeyPassphraseHandler – Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: `<privateKeyPassphraseHandler>`, `<name>KeyFileHandler</name>`, `<options><password>test</password></options>`, `</privateKeyPassphraseHandler>`.
|
||||
@ -622,11 +622,11 @@ Keys for server/client settings:
|
||||
</openSSL>
|
||||
```
|
||||
|
||||
## part\_log {#server_configuration_parameters-part-log}
|
||||
## part_log {#server_configuration_parameters-part-log}
|
||||
|
||||
Logging events that are associated with [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process.
|
||||
|
||||
Queries are logged in the [system.part\_log](../../operations/system-tables/part_log.md#system_tables-part-log) table, not in a separate file. You can configure the name of this table in the `table` parameter (see below).
|
||||
Queries are logged in the [system.part_log](../../operations/system-tables/part_log.md#system_tables-part-log) table, not in a separate file. You can configure the name of this table in the `table` parameter (see below).
|
||||
|
||||
Use the following parameters to configure logging:
|
||||
|
||||
@ -670,7 +670,7 @@ Settings:
|
||||
- `port` – Port for `endpoint`.
|
||||
- `metrics` – Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
|
||||
- `events` – Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
|
||||
- `asynchronous_metrics` – Flag that sets to expose current metrics values from the [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
|
||||
- `asynchronous_metrics` – Flag that sets to expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -684,11 +684,11 @@ Settings:
|
||||
</prometheus>
|
||||
```
|
||||
|
||||
## query\_log {#server_configuration_parameters-query-log}
|
||||
## query_log {#server_configuration_parameters-query-log}
|
||||
|
||||
Setting for logging queries received with the [log\_queries=1](../../operations/settings/settings.md) setting.
|
||||
Setting for logging queries received with the [log_queries=1](../../operations/settings/settings.md) setting.
|
||||
|
||||
Queries are logged in the [system.query\_log](../../operations/system-tables/query_log.md#system_tables-query_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
|
||||
Queries are logged in the [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
|
||||
|
||||
Use the following parameters to configure logging:
|
||||
|
||||
@ -711,11 +711,11 @@ If the table doesn’t exist, ClickHouse will create it. If the structure of the
|
||||
</query_log>
|
||||
```
|
||||
|
||||
## query\_thread\_log {#server_configuration_parameters-query_thread_log}
|
||||
## query_thread_log {#server_configuration_parameters-query_thread_log}
|
||||
|
||||
Setting for logging threads of queries received with the [log\_query\_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting.
|
||||
Setting for logging threads of queries received with the [log_query_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting.
|
||||
|
||||
Queries are logged in the [system.query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
|
||||
Queries are logged in the [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
|
||||
|
||||
Use the following parameters to configure logging:
|
||||
|
||||
@ -738,9 +738,9 @@ If the table doesn’t exist, ClickHouse will create it. If the structure of the
|
||||
</query_thread_log>
|
||||
```
|
||||
|
||||
## text\_log {#server_configuration_parameters-text_log}
|
||||
## text_log {#server_configuration_parameters-text_log}
|
||||
|
||||
Settings for the [text\_log](../../operations/system-tables/text_log.md#system_tables-text_log) system table for logging text messages.
|
||||
Settings for the [text_log](../../operations/system-tables/text_log.md#system_tables-text_log) system table for logging text messages.
|
||||
|
||||
Parameters:
|
||||
|
||||
@ -766,9 +766,9 @@ Parameters:
|
||||
```
|
||||
|
||||
|
||||
## trace\_log {#server_configuration_parameters-trace_log}
|
||||
## trace_log {#server_configuration_parameters-trace_log}
|
||||
|
||||
Settings for the [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.
|
||||
Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.
|
||||
|
||||
Parameters:
|
||||
|
||||
@ -789,7 +789,7 @@ The default server configuration file `config.xml` contains the following settin
|
||||
</trace_log>
|
||||
```
|
||||
|
||||
## query\_masking\_rules {#query-masking-rules}
|
||||
## query_masking_rules {#query-masking-rules}
|
||||
|
||||
Regexp-based rules, which will be applied to queries as well as all log messages before storing them in server logs,
|
||||
`system.query_log`, `system.text_log`, `system.processes` tables, and in logs sent to the client. That allows preventing
|
||||
@ -820,7 +820,7 @@ The masking rules are applied to the whole query (to prevent leaks of sensitive
|
||||
For distributed queries each server have to be configured separately, otherwise, subqueries passed to other
|
||||
nodes will be stored without masking.
|
||||
|
||||
## remote\_servers {#server-settings-remote-servers}
|
||||
## remote_servers {#server-settings-remote-servers}
|
||||
|
||||
Configuration of clusters used by the [Distributed](../../engines/table-engines/special/distributed.md) table engine and by the `cluster` table function.
|
||||
|
||||
@ -834,7 +834,7 @@ For the value of the `incl` attribute, see the section “[Configuration files](
|
||||
|
||||
**See Also**
|
||||
|
||||
- [skip\_unavailable\_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards)
|
||||
- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards)
|
||||
|
||||
## timezone {#server_configuration_parameters-timezone}
|
||||
|
||||
@ -850,7 +850,7 @@ The time zone is necessary for conversions between String and DateTime formats w
|
||||
<timezone>Europe/Moscow</timezone>
|
||||
```
|
||||
|
||||
## tcp\_port {#server_configuration_parameters-tcp_port}
|
||||
## tcp_port {#server_configuration_parameters-tcp_port}
|
||||
|
||||
Port for communicating with clients over the TCP protocol.
|
||||
|
||||
@ -860,7 +860,7 @@ Port for communicating with clients over the TCP protocol.
|
||||
<tcp_port>9000</tcp_port>
|
||||
```
|
||||
|
||||
## tcp\_port\_secure {#server_configuration_parameters-tcp_port_secure}
|
||||
## tcp_port_secure {#server_configuration_parameters-tcp_port_secure}
|
||||
|
||||
TCP port for secure communication with clients. Use it with [OpenSSL](#server_configuration_parameters-openssl) settings.
|
||||
|
||||
@ -874,7 +874,7 @@ Positive integer.
|
||||
<tcp_port_secure>9440</tcp_port_secure>
|
||||
```
|
||||
|
||||
## mysql\_port {#server_configuration_parameters-mysql_port}
|
||||
## mysql_port {#server_configuration_parameters-mysql_port}
|
||||
|
||||
Port for communicating with clients over MySQL protocol.
|
||||
|
||||
@ -888,7 +888,7 @@ Example
|
||||
<mysql_port>9004</mysql_port>
|
||||
```
|
||||
|
||||
## tmp\_path {#tmp-path}
|
||||
## tmp_path {#tmp-path}
|
||||
|
||||
Path to temporary data for processing large queries.
|
||||
|
||||
@ -901,11 +901,11 @@ Path to temporary data for processing large queries.
|
||||
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
|
||||
```
|
||||
|
||||
## tmp\_policy {#tmp-policy}
|
||||
## tmp_policy {#tmp-policy}
|
||||
|
||||
Policy from [storage\_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files.
|
||||
Policy from [storage_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files.
|
||||
|
||||
If not set, [tmp\_path](#tmp-path) is used, otherwise it is ignored.
|
||||
If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored.
|
||||
|
||||
!!! note "Note"
|
||||
- `move_factor` is ignored.
|
||||
@ -913,11 +913,11 @@ If not set, [tmp\_path](#tmp-path) is used, otherwise it is ignored.
|
||||
- `max_data_part_size_bytes` is ignored.
|
||||
- Уou must have exactly one volume in that policy.
|
||||
|
||||
## uncompressed\_cache\_size {#server-settings-uncompressed_cache_size}
|
||||
## uncompressed_cache_size {#server-settings-uncompressed_cache_size}
|
||||
|
||||
Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
|
||||
|
||||
There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use\_uncompressed\_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache) is enabled.
|
||||
There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache) is enabled.
|
||||
|
||||
The uncompressed cache is advantageous for very short queries in individual cases.
|
||||
|
||||
@ -927,7 +927,7 @@ The uncompressed cache is advantageous for very short queries in individual case
|
||||
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
|
||||
```
|
||||
|
||||
## user\_files\_path {#server_configuration_parameters-user_files_path}
|
||||
## user_files_path {#server_configuration_parameters-user_files_path}
|
||||
|
||||
The directory with user files. Used in the table function [file()](../../sql-reference/table-functions/file.md).
|
||||
|
||||
@ -937,7 +937,7 @@ The directory with user files. Used in the table function [file()](../../sql-ref
|
||||
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
|
||||
```
|
||||
|
||||
## users\_config {#users-config}
|
||||
## users_config {#users-config}
|
||||
|
||||
Path to the file that contains:
|
||||
|
||||
@ -1005,13 +1005,13 @@ This section contains the following parameters:
|
||||
- [Replication](../../engines/table-engines/mergetree-family/replication.md)
|
||||
- [ZooKeeper Programmer’s Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html)
|
||||
|
||||
## use\_minimalistic\_part\_header\_in\_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper}
|
||||
## use_minimalistic_part_header_in_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper}
|
||||
|
||||
Storage method for data part headers in ZooKeeper.
|
||||
|
||||
This setting only applies to the `MergeTree` family. It can be specified:
|
||||
|
||||
- Globally in the [merge\_tree](#server_configuration_parameters-merge_tree) section of the `config.xml` file.
|
||||
- Globally in the [merge_tree](#server_configuration_parameters-merge_tree) section of the `config.xml` file.
|
||||
|
||||
ClickHouse uses the setting for all the tables on the server. You can change the setting at any time. Existing tables change their behaviour when the setting changes.
|
||||
|
||||
@ -1033,14 +1033,14 @@ If `use_minimalistic_part_header_in_zookeeper = 1`, then [replicated](../../engi
|
||||
|
||||
**Default value:** 0.
|
||||
|
||||
## disable\_internal\_dns\_cache {#server-settings-disable-internal-dns-cache}
|
||||
## disable_internal_dns_cache {#server-settings-disable-internal-dns-cache}
|
||||
|
||||
Disables the internal DNS cache. Recommended for operating ClickHouse in systems
|
||||
with frequently changing infrastructure such as Kubernetes.
|
||||
|
||||
**Default value:** 0.
|
||||
|
||||
## dns\_cache\_update\_period {#server-settings-dns-cache-update-period}
|
||||
## dns_cache_update_period {#server-settings-dns-cache-update-period}
|
||||
|
||||
The period of updating IP addresses stored in the ClickHouse internal DNS cache (in seconds).
|
||||
The update is performed asynchronously, in a separate system thread.
|
||||
@ -1049,9 +1049,9 @@ The update is performed asynchronously, in a separate system thread.
|
||||
|
||||
**See also**
|
||||
|
||||
- [background\_schedule\_pool\_size](../../operations/settings/settings.md#background_schedule_pool_size)
|
||||
- [background_schedule_pool_size](../../operations/settings/settings.md#background_schedule_pool_size)
|
||||
|
||||
## access\_control\_path {#access_control_path}
|
||||
## access_control_path {#access_control_path}
|
||||
|
||||
Path to a folder where a ClickHouse server stores user and role configurations created by SQL commands.
|
||||
|
||||
|
@ -16,7 +16,7 @@ Queries in ClickHouse can be divided into several types:
|
||||
The following settings regulate user permissions by the type of query:
|
||||
|
||||
- [readonly](#settings_readonly) — Restricts permissions for all types of queries except DDL queries.
|
||||
- [allow\_ddl](#settings_allow_ddl) — Restricts permissions for DDL queries.
|
||||
- [allow_ddl](#settings_allow_ddl) — Restricts permissions for DDL queries.
|
||||
|
||||
`KILL QUERY` can be performed with any settings.
|
||||
|
||||
@ -41,7 +41,7 @@ from changing only specific settings, for details see [constraints on settings](
|
||||
|
||||
Default value: 0
|
||||
|
||||
## allow\_ddl {#settings_allow_ddl}
|
||||
## allow_ddl {#settings_allow_ddl}
|
||||
|
||||
Allows or denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries.
|
||||
|
||||
|
@ -12,8 +12,8 @@ Almost all the restrictions only apply to `SELECT`. For distributed query proces
|
||||
ClickHouse checks the restrictions for data parts, not for each row. It means that you can exceed the value of restriction with the size of the data part.
|
||||
|
||||
Restrictions on the “maximum amount of something” can take the value 0, which means “unrestricted”.
|
||||
Most restrictions also have an ‘overflow\_mode’ setting, meaning what to do when the limit is exceeded.
|
||||
It can take one of two values: `throw` or `break`. Restrictions on aggregation (group\_by\_overflow\_mode) also have the value `any`.
|
||||
Most restrictions also have an ‘overflow_mode’ setting, meaning what to do when the limit is exceeded.
|
||||
It can take one of two values: `throw` or `break`. Restrictions on aggregation (group_by_overflow_mode) also have the value `any`.
|
||||
|
||||
`throw` – Throw an exception (default).
|
||||
|
||||
@ -21,7 +21,7 @@ It can take one of two values: `throw` or `break`. Restrictions on aggregation (
|
||||
|
||||
`any (only for group_by_overflow_mode)` – Continuing aggregation for the keys that got into the set, but don’t add new keys to the set.
|
||||
|
||||
## max\_memory\_usage {#settings_max_memory_usage}
|
||||
## max_memory_usage {#settings_max_memory_usage}
|
||||
|
||||
The maximum amount of RAM to use for running a query on a single server.
|
||||
|
||||
@ -36,31 +36,31 @@ Memory usage is not monitored for the states of certain aggregate functions.
|
||||
|
||||
Memory usage is not fully tracked for states of the aggregate functions `min`, `max`, `any`, `anyLast`, `argMin`, `argMax` from `String` and `Array` arguments.
|
||||
|
||||
Memory consumption is also restricted by the parameters `max_memory_usage_for_user` and [max\_server\_memory\_usage](../../operations/server-configuration-parameters/settings.md#max_server_memory_usage).
|
||||
Memory consumption is also restricted by the parameters `max_memory_usage_for_user` and [max_server_memory_usage](../../operations/server-configuration-parameters/settings.md#max_server_memory_usage).
|
||||
|
||||
## max\_memory\_usage\_for\_user {#max-memory-usage-for-user}
|
||||
## max_memory_usage_for_user {#max-memory-usage-for-user}
|
||||
|
||||
The maximum amount of RAM to use for running a user’s queries on a single server.
|
||||
|
||||
Default values are defined in [Settings.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Settings.h#L288). By default, the amount is not restricted (`max_memory_usage_for_user = 0`).
|
||||
|
||||
See also the description of [max\_memory\_usage](#settings_max_memory_usage).
|
||||
See also the description of [max_memory_usage](#settings_max_memory_usage).
|
||||
|
||||
## max\_rows\_to\_read {#max-rows-to-read}
|
||||
## max_rows_to_read {#max-rows-to-read}
|
||||
|
||||
The following restrictions can be checked on each block (instead of on each row). That is, the restrictions can be broken a little.
|
||||
|
||||
A maximum number of rows that can be read from a table when running a query.
|
||||
|
||||
## max\_bytes\_to\_read {#max-bytes-to-read}
|
||||
## max_bytes_to_read {#max-bytes-to-read}
|
||||
|
||||
A maximum number of bytes (uncompressed data) that can be read from a table when running a query.
|
||||
|
||||
## read\_overflow\_mode {#read-overflow-mode}
|
||||
## read_overflow_mode {#read-overflow-mode}
|
||||
|
||||
What to do when the volume of data read exceeds one of the limits: ‘throw’ or ‘break’. By default, throw.
|
||||
|
||||
## max\_rows\_to\_read_leaf {#max-rows-to-read-leaf}
|
||||
## max_rows_to_read_leaf {#max-rows-to-read-leaf}
|
||||
|
||||
The following restrictions can be checked on each block (instead of on each row). That is, the restrictions can be broken a little.
|
||||
|
||||
@ -71,7 +71,7 @@ and each shard contains a table with 100 rows. Then distributed query which supp
|
||||
tables with setting `max_rows_to_read=150` will fail as in total it will be 200 rows. While query
|
||||
with `max_rows_to_read_leaf=150` will succeed since leaf nodes will read 100 rows at max.
|
||||
|
||||
## max\_bytes\_to\_read_leaf {#max-bytes-to-read-leaf}
|
||||
## max_bytes_to_read_leaf {#max-bytes-to-read-leaf}
|
||||
|
||||
A maximum number of bytes (uncompressed data) that can be read from a local table on a leaf node when running
|
||||
a distributed query. While distributed queries can issue a multiple sub-queries to each shard (leaf) - this limit will
|
||||
@ -81,20 +81,20 @@ Then distributed query which suppose to read all the data from both tables with
|
||||
as in total it will be 200 bytes. While query with `max_bytes_to_read_leaf=150` will succeed since leaf nodes will read
|
||||
100 bytes at max.
|
||||
|
||||
## read\_overflow\_mode_leaf {#read-overflow-mode-leaf}
|
||||
## read_overflow_mode_leaf {#read-overflow-mode-leaf}
|
||||
|
||||
What to do when the volume of data read exceeds one of the leaf limits: ‘throw’ or ‘break’. By default, throw.
|
||||
|
||||
## max\_rows\_to\_group\_by {#settings-max-rows-to-group-by}
|
||||
## max_rows_to_group_by {#settings-max-rows-to-group-by}
|
||||
|
||||
A maximum number of unique keys received from aggregation. This setting lets you limit memory consumption when aggregating.
|
||||
|
||||
## group\_by\_overflow\_mode {#group-by-overflow-mode}
|
||||
## group_by_overflow_mode {#group-by-overflow-mode}
|
||||
|
||||
What to do when the number of unique keys for aggregation exceeds the limit: ‘throw’, ‘break’, or ‘any’. By default, throw.
|
||||
Using the ‘any’ value lets you run an approximation of GROUP BY. The quality of this approximation depends on the statistical nature of the data.
|
||||
|
||||
## max\_bytes\_before\_external\_group\_by {#settings-max_bytes_before_external_group_by}
|
||||
## max_bytes_before_external_group_by {#settings-max_bytes_before_external_group_by}
|
||||
|
||||
Enables or disables execution of `GROUP BY` clauses in external memory. See [GROUP BY in external memory](../../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory).
|
||||
|
||||
@ -105,31 +105,31 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## max\_rows\_to\_sort {#max-rows-to-sort}
|
||||
## max_rows_to_sort {#max-rows-to-sort}
|
||||
|
||||
A maximum number of rows before sorting. This allows you to limit memory consumption when sorting.
|
||||
|
||||
## max\_bytes\_to\_sort {#max-bytes-to-sort}
|
||||
## max_bytes_to_sort {#max-bytes-to-sort}
|
||||
|
||||
A maximum number of bytes before sorting.
|
||||
|
||||
## sort\_overflow\_mode {#sort-overflow-mode}
|
||||
## sort_overflow_mode {#sort-overflow-mode}
|
||||
|
||||
What to do if the number of rows received before sorting exceeds one of the limits: ‘throw’ or ‘break’. By default, throw.
|
||||
|
||||
## max\_result\_rows {#setting-max_result_rows}
|
||||
## max_result_rows {#setting-max_result_rows}
|
||||
|
||||
Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query.
|
||||
|
||||
## max\_result\_bytes {#max-result-bytes}
|
||||
## max_result_bytes {#max-result-bytes}
|
||||
|
||||
Limit on the number of bytes in the result. The same as the previous setting.
|
||||
|
||||
## result\_overflow\_mode {#result-overflow-mode}
|
||||
## result_overflow_mode {#result-overflow-mode}
|
||||
|
||||
What to do if the volume of the result exceeds one of the limits: ‘throw’ or ‘break’. By default, throw.
|
||||
|
||||
Using ‘break’ is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max\_result\_rows](#setting-max_result_rows), multiple of [max\_block\_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max\_threads](../../operations/settings/settings.md#settings-max_threads).
|
||||
Using ‘break’ is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max_result_rows](#setting-max_result_rows), multiple of [max_block_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max_threads](../../operations/settings/settings.md#settings-max_threads).
|
||||
|
||||
Example:
|
||||
|
||||
@ -148,103 +148,103 @@ Result:
|
||||
6666 rows in set. ...
|
||||
```
|
||||
|
||||
## max\_execution\_time {#max-execution-time}
|
||||
## max_execution_time {#max-execution-time}
|
||||
|
||||
Maximum query execution time in seconds.
|
||||
At this time, it is not checked for one of the sorting stages, or when merging and finalizing aggregate functions.
|
||||
|
||||
## timeout\_overflow\_mode {#timeout-overflow-mode}
|
||||
## timeout_overflow_mode {#timeout-overflow-mode}
|
||||
|
||||
What to do if the query is run longer than ‘max\_execution\_time’: ‘throw’ or ‘break’. By default, throw.
|
||||
What to do if the query is run longer than ‘max_execution_time’: ‘throw’ or ‘break’. By default, throw.
|
||||
|
||||
## min\_execution\_speed {#min-execution-speed}
|
||||
## min_execution_speed {#min-execution-speed}
|
||||
|
||||
Minimal execution speed in rows per second. Checked on every data block when ‘timeout\_before\_checking\_execution\_speed’ expires. If the execution speed is lower, an exception is thrown.
|
||||
Minimal execution speed in rows per second. Checked on every data block when ‘timeout_before_checking_execution_speed’ expires. If the execution speed is lower, an exception is thrown.
|
||||
|
||||
## min\_execution\_speed\_bytes {#min-execution-speed-bytes}
|
||||
## min_execution_speed_bytes {#min-execution-speed-bytes}
|
||||
|
||||
A minimum number of execution bytes per second. Checked on every data block when ‘timeout\_before\_checking\_execution\_speed’ expires. If the execution speed is lower, an exception is thrown.
|
||||
A minimum number of execution bytes per second. Checked on every data block when ‘timeout_before_checking_execution_speed’ expires. If the execution speed is lower, an exception is thrown.
|
||||
|
||||
## max\_execution\_speed {#max-execution-speed}
|
||||
## max_execution_speed {#max-execution-speed}
|
||||
|
||||
A maximum number of execution rows per second. Checked on every data block when ‘timeout\_before\_checking\_execution\_speed’ expires. If the execution speed is high, the execution speed will be reduced.
|
||||
A maximum number of execution rows per second. Checked on every data block when ‘timeout_before_checking_execution_speed’ expires. If the execution speed is high, the execution speed will be reduced.
|
||||
|
||||
## max\_execution\_speed\_bytes {#max-execution-speed-bytes}
|
||||
## max_execution_speed_bytes {#max-execution-speed-bytes}
|
||||
|
||||
A maximum number of execution bytes per second. Checked on every data block when ‘timeout\_before\_checking\_execution\_speed’ expires. If the execution speed is high, the execution speed will be reduced.
|
||||
A maximum number of execution bytes per second. Checked on every data block when ‘timeout_before_checking_execution_speed’ expires. If the execution speed is high, the execution speed will be reduced.
|
||||
|
||||
## timeout\_before\_checking\_execution\_speed {#timeout-before-checking-execution-speed}
|
||||
## timeout_before_checking_execution_speed {#timeout-before-checking-execution-speed}
|
||||
|
||||
Checks that execution speed is not too slow (no less than ‘min\_execution\_speed’), after the specified time in seconds has expired.
|
||||
Checks that execution speed is not too slow (no less than ‘min_execution_speed’), after the specified time in seconds has expired.
|
||||
|
||||
## max\_columns\_to\_read {#max-columns-to-read}
|
||||
## max_columns_to_read {#max-columns-to-read}
|
||||
|
||||
A maximum number of columns that can be read from a table in a single query. If a query requires reading a greater number of columns, it throws an exception.
|
||||
|
||||
## max\_temporary\_columns {#max-temporary-columns}
|
||||
## max_temporary_columns {#max-temporary-columns}
|
||||
|
||||
A maximum number of temporary columns that must be kept in RAM at the same time when running a query, including constant columns. If there are more temporary columns than this, it throws an exception.
|
||||
|
||||
## max\_temporary\_non\_const\_columns {#max-temporary-non-const-columns}
|
||||
## max_temporary_non_const_columns {#max-temporary-non-const-columns}
|
||||
|
||||
The same thing as ‘max\_temporary\_columns’, but without counting constant columns.
|
||||
The same thing as ‘max_temporary_columns’, but without counting constant columns.
|
||||
Note that constant columns are formed fairly often when running a query, but they require approximately zero computing resources.
|
||||
|
||||
## max\_subquery\_depth {#max-subquery-depth}
|
||||
## max_subquery_depth {#max-subquery-depth}
|
||||
|
||||
Maximum nesting depth of subqueries. If subqueries are deeper, an exception is thrown. By default, 100.
|
||||
|
||||
## max\_pipeline\_depth {#max-pipeline-depth}
|
||||
## max_pipeline_depth {#max-pipeline-depth}
|
||||
|
||||
Maximum pipeline depth. Corresponds to the number of transformations that each data block goes through during query processing. Counted within the limits of a single server. If the pipeline depth is greater, an exception is thrown. By default, 1000.
|
||||
|
||||
## max\_ast\_depth {#max-ast-depth}
|
||||
## max_ast_depth {#max-ast-depth}
|
||||
|
||||
Maximum nesting depth of a query syntactic tree. If exceeded, an exception is thrown.
|
||||
At this time, it isn’t checked during parsing, but only after parsing the query. That is, a syntactic tree that is too deep can be created during parsing, but the query will fail. By default, 1000.
|
||||
|
||||
## max\_ast\_elements {#max-ast-elements}
|
||||
## max_ast_elements {#max-ast-elements}
|
||||
|
||||
A maximum number of elements in a query syntactic tree. If exceeded, an exception is thrown.
|
||||
In the same way as the previous setting, it is checked only after parsing the query. By default, 50,000.
|
||||
|
||||
## max\_rows\_in\_set {#max-rows-in-set}
|
||||
## max_rows_in_set {#max-rows-in-set}
|
||||
|
||||
A maximum number of rows for a data set in the IN clause created from a subquery.
|
||||
|
||||
## max\_bytes\_in\_set {#max-bytes-in-set}
|
||||
## max_bytes_in_set {#max-bytes-in-set}
|
||||
|
||||
A maximum number of bytes (uncompressed data) used by a set in the IN clause created from a subquery.
|
||||
|
||||
## set\_overflow\_mode {#set-overflow-mode}
|
||||
## set_overflow_mode {#set-overflow-mode}
|
||||
|
||||
What to do when the amount of data exceeds one of the limits: ‘throw’ or ‘break’. By default, throw.
|
||||
|
||||
## max\_rows\_in\_distinct {#max-rows-in-distinct}
|
||||
## max_rows_in_distinct {#max-rows-in-distinct}
|
||||
|
||||
A maximum number of different rows when using DISTINCT.
|
||||
|
||||
## max\_bytes\_in\_distinct {#max-bytes-in-distinct}
|
||||
## max_bytes_in_distinct {#max-bytes-in-distinct}
|
||||
|
||||
A maximum number of bytes used by a hash table when using DISTINCT.
|
||||
|
||||
## distinct\_overflow\_mode {#distinct-overflow-mode}
|
||||
## distinct_overflow_mode {#distinct-overflow-mode}
|
||||
|
||||
What to do when the amount of data exceeds one of the limits: ‘throw’ or ‘break’. By default, throw.
|
||||
|
||||
## max\_rows\_to\_transfer {#max-rows-to-transfer}
|
||||
## max_rows_to_transfer {#max-rows-to-transfer}
|
||||
|
||||
A maximum number of rows that can be passed to a remote server or saved in a temporary table when using GLOBAL IN.
|
||||
|
||||
## max\_bytes\_to\_transfer {#max-bytes-to-transfer}
|
||||
## max_bytes_to_transfer {#max-bytes-to-transfer}
|
||||
|
||||
A maximum number of bytes (uncompressed data) that can be passed to a remote server or saved in a temporary table when using GLOBAL IN.
|
||||
|
||||
## transfer\_overflow\_mode {#transfer-overflow-mode}
|
||||
## transfer_overflow_mode {#transfer-overflow-mode}
|
||||
|
||||
What to do when the amount of data exceeds one of the limits: ‘throw’ or ‘break’. By default, throw.
|
||||
|
||||
## max\_rows\_in\_join {#settings-max_rows_in_join}
|
||||
## max_rows_in_join {#settings-max_rows_in_join}
|
||||
|
||||
Limits the number of rows in the hash table that is used when joining tables.
|
||||
|
||||
@ -252,7 +252,7 @@ This settings applies to [SELECT … JOIN](../../sql-reference/statements/select
|
||||
|
||||
If a query contains multiple joins, ClickHouse checks this setting for every intermediate result.
|
||||
|
||||
ClickHouse can proceed with different actions when the limit is reached. Use the [join\_overflow\_mode](#settings-join_overflow_mode) setting to choose the action.
|
||||
ClickHouse can proceed with different actions when the limit is reached. Use the [join_overflow_mode](#settings-join_overflow_mode) setting to choose the action.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -261,7 +261,7 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## max\_bytes\_in\_join {#settings-max_bytes_in_join}
|
||||
## max_bytes_in_join {#settings-max_bytes_in_join}
|
||||
|
||||
Limits the size in bytes of the hash table used when joining tables.
|
||||
|
||||
@ -269,7 +269,7 @@ This settings applies to [SELECT … JOIN](../../sql-reference/statements/select
|
||||
|
||||
If the query contains joins, ClickHouse checks this setting for every intermediate result.
|
||||
|
||||
ClickHouse can proceed with different actions when the limit is reached. Use [join\_overflow\_mode](#settings-join_overflow_mode) settings to choose the action.
|
||||
ClickHouse can proceed with different actions when the limit is reached. Use [join_overflow_mode](#settings-join_overflow_mode) settings to choose the action.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -278,12 +278,12 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## join\_overflow\_mode {#settings-join_overflow_mode}
|
||||
## join_overflow_mode {#settings-join_overflow_mode}
|
||||
|
||||
Defines what action ClickHouse performs when any of the following join limits is reached:
|
||||
|
||||
- [max\_bytes\_in\_join](#settings-max_bytes_in_join)
|
||||
- [max\_rows\_in\_join](#settings-max_rows_in_join)
|
||||
- [max_bytes_in_join](#settings-max_bytes_in_join)
|
||||
- [max_rows_in_join](#settings-max_rows_in_join)
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -297,7 +297,7 @@ Default value: `THROW`.
|
||||
- [JOIN clause](../../sql-reference/statements/select/join.md#select-join)
|
||||
- [Join table engine](../../engines/table-engines/special/join.md)
|
||||
|
||||
## max\_partitions\_per\_insert\_block {#max-partitions-per-insert-block}
|
||||
## max_partitions_per_insert_block {#max-partitions-per-insert-block}
|
||||
|
||||
Limits the maximum number of partitions in a single inserted block.
|
||||
|
||||
@ -310,6 +310,6 @@ Default value: 100.
|
||||
|
||||
When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse throws an exception with the following text:
|
||||
|
||||
> “Too many partitions for single INSERT block (more than” + toString(max\_parts) + “). The limit is controlled by ‘max\_partitions\_per\_insert\_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).”
|
||||
> “Too many partitions for single INSERT block (more than” + toString(max_parts) + “). The limit is controlled by ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).”
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/settings/query_complexity/) <!--hide-->
|
||||
|
@ -41,7 +41,7 @@ Structure of the `users` section:
|
||||
</users>
|
||||
```
|
||||
|
||||
### user\_name/password {#user-namepassword}
|
||||
### user_name/password {#user-namepassword}
|
||||
|
||||
Password can be specified in plaintext or in SHA256 (hex format).
|
||||
|
||||
@ -73,7 +73,7 @@ Password can be specified in plaintext or in SHA256 (hex format).
|
||||
|
||||
The first line of the result is the password. The second line is the corresponding double SHA1 hash.
|
||||
|
||||
### access\_management {#access_management-user-setting}
|
||||
### access_management {#access_management-user-setting}
|
||||
|
||||
This setting enables or disables using of SQL-driven [access control and account management](../../operations/access-rights.md#access-control) for the user.
|
||||
|
||||
@ -84,7 +84,7 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
### user\_name/networks {#user-namenetworks}
|
||||
### user_name/networks {#user-namenetworks}
|
||||
|
||||
List of networks from which the user can connect to the ClickHouse server.
|
||||
|
||||
@ -126,18 +126,18 @@ To open access only from localhost, specify:
|
||||
<ip>127.0.0.1</ip>
|
||||
```
|
||||
|
||||
### user\_name/profile {#user-nameprofile}
|
||||
### user_name/profile {#user-nameprofile}
|
||||
|
||||
You can assign a settings profile for the user. Settings profiles are configured in a separate section of the `users.xml` file. For more information, see [Profiles of Settings](../../operations/settings/settings-profiles.md).
|
||||
|
||||
### user\_name/quota {#user-namequota}
|
||||
### user_name/quota {#user-namequota}
|
||||
|
||||
Quotas allow you to track or limit resource usage over a period of time. Quotas are configured in the `quotas`
|
||||
section of the `users.xml` configuration file.
|
||||
|
||||
You can assign a quotas set for the user. For a detailed description of quotas configuration, see [Quotas](../../operations/quotas.md#quotas).
|
||||
|
||||
### user\_name/databases {#user-namedatabases}
|
||||
### user_name/databases {#user-namedatabases}
|
||||
|
||||
In this section, you can you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security.
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -33,6 +33,6 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10
|
||||
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
|
||||
- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics.
|
||||
- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred.
|
||||
- [system.metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
|
||||
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/asynchronous_metrics) <!--hide-->
|
@ -20,7 +20,7 @@ Please note that `errors_count` is updated once per query to the cluster, but `e
|
||||
**See also**
|
||||
|
||||
- [Table engine Distributed](../../engines/table-engines/special/distributed.md)
|
||||
- [distributed\_replica\_error\_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap)
|
||||
- [distributed\_replica\_error\_half\_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life)
|
||||
- [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap)
|
||||
- [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/clusters) <!--hide-->
|
||||
|
@ -26,9 +26,9 @@ SELECT * FROM system.events LIMIT 5
|
||||
|
||||
**See Also**
|
||||
|
||||
- [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
|
||||
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
|
||||
- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics.
|
||||
- [system.metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
|
||||
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
|
||||
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/events) <!--hide-->
|
||||
|
@ -1,6 +1,6 @@
|
||||
# system.graphite_retentions {#system-graphite-retentions}
|
||||
|
||||
Contains information about parameters [graphite\_rollup](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) which are used in tables with [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md) engines.
|
||||
Contains information about parameters [graphite_rollup](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) which are used in tables with [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md) engines.
|
||||
|
||||
Columns:
|
||||
|
||||
|
@ -20,7 +20,7 @@ System tables:
|
||||
|
||||
Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start.
|
||||
|
||||
Unlike other system tables, the system tables [metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query\_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
|
||||
Unlike other system tables, the system tables [metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
|
||||
|
||||
By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter/ttl.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables.
|
||||
|
||||
|
@ -49,7 +49,7 @@ CurrentMetric_DistributedFilesToInsert: 0
|
||||
|
||||
**See also**
|
||||
|
||||
- [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md) — Contains periodically calculated metrics.
|
||||
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md) — Contains periodically calculated metrics.
|
||||
- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
|
||||
- [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics.
|
||||
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
|
||||
|
@ -33,9 +33,9 @@ SELECT * FROM system.metrics LIMIT 10
|
||||
|
||||
**See Also**
|
||||
|
||||
- [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
|
||||
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
|
||||
- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
|
||||
- [system.metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
|
||||
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
|
||||
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/metrics) <!--hide-->
|
||||
|
@ -1,6 +1,6 @@
|
||||
# system.part_log {#system_tables-part-log}
|
||||
|
||||
The `system.part_log` table is created only if the [part\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-part-log) server setting is specified.
|
||||
The `system.part_log` table is created only if the [part_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-part-log) server setting is specified.
|
||||
|
||||
This table contains information about events that occurred with [data parts](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family tables, such as adding or merging data.
|
||||
|
||||
|
@ -10,7 +10,7 @@ Columns:
|
||||
- `rows_read` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
|
||||
- `bytes_read` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
|
||||
- `total_rows_approx` (UInt64) – The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known.
|
||||
- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max\_memory\_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting.
|
||||
- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting.
|
||||
- `query` (String) – The query text. For `INSERT`, it doesn’t include the data to insert.
|
||||
- `query_id` (String) – Query ID, if defined.
|
||||
|
||||
|
@ -5,11 +5,11 @@ Contains information about executed queries, for example, start time, duration o
|
||||
!!! note "Note"
|
||||
This table doesn’t contain the ingested data for `INSERT` queries.
|
||||
|
||||
You can change settings of queries logging in the [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) section of the server configuration.
|
||||
You can change settings of queries logging in the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) section of the server configuration.
|
||||
|
||||
You can disable queries logging by setting [log\_queries = 0](../../operations/settings/settings.md#settings-log-queries). We don’t recommend to turn off logging because information in this table is important for solving issues.
|
||||
You can disable queries logging by setting [log_queries = 0](../../operations/settings/settings.md#settings-log-queries). We don’t recommend to turn off logging because information in this table is important for solving issues.
|
||||
|
||||
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
|
||||
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
|
||||
|
||||
ClickHouse doesn’t delete data from the table automatically. See [Introduction](../../operations/system-tables/index.md#system-tables-introduction) for more details.
|
||||
|
||||
@ -140,7 +140,7 @@ Settings.Values: ['0','random','1','10000000000','1']
|
||||
|
||||
**See Also**
|
||||
|
||||
- [system.query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread.
|
||||
- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_log) <!--hide-->
|
||||
|
||||
|
@ -4,10 +4,10 @@ Contains information about threads which execute queries, for example, thread na
|
||||
|
||||
To start logging:
|
||||
|
||||
1. Configure parameters in the [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) section.
|
||||
2. Set [log\_query\_threads](../../operations/settings/settings.md#settings-log-query-threads) to 1.
|
||||
1. Configure parameters in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) section.
|
||||
2. Set [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) to 1.
|
||||
|
||||
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
|
||||
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
|
||||
|
||||
ClickHouse doesn’t delete data from the table automatically. See [Introduction](../../operations/system-tables/index.md#system-tables-introduction) for more details.
|
||||
|
||||
@ -113,6 +113,6 @@ ProfileEvents.Values: [1,1,11,11,591,148,3,71,29,6533808,1,11,72,18,47,
|
||||
|
||||
**See Also**
|
||||
|
||||
- [system.query\_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution.
|
||||
- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_thread_log) <!--hide-->
|
||||
|
@ -63,7 +63,7 @@ Columns:
|
||||
- `parts_to_check` (`UInt32`) - The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged.
|
||||
- `zookeeper_path` (`String`) - Path to table data in ZooKeeper.
|
||||
- `replica_name` (`String`) - Replica name in ZooKeeper. Different replicas of the same table have different names.
|
||||
- `replica_path` (`String`) - Path to replica data in ZooKeeper. The same as concatenating ‘zookeeper\_path/replicas/replica\_path’.
|
||||
- `replica_path` (`String`) - Path to replica data in ZooKeeper. The same as concatenating ‘zookeeper_path/replicas/replica_path’.
|
||||
- `columns_version` (`Int32`) - Version number of the table structure. Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas haven’t made all of the ALTERs yet.
|
||||
- `queue_size` (`UInt32`) - Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with `future_parts`.
|
||||
- `inserts_in_queue` (`UInt32`) - Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong.
|
||||
@ -84,7 +84,7 @@ The next 4 columns have a non-zero value only where there is an active session w
|
||||
- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ZooKeeper (i.e., the number of functioning replicas).
|
||||
|
||||
If you request all the columns, the table may work a bit slowly, since several reads from ZooKeeper are made for each row.
|
||||
If you don’t request the last 4 columns (log\_max\_index, log\_pointer, total\_replicas, active\_replicas), the table works quickly.
|
||||
If you don’t request the last 4 columns (log_max_index, log_pointer, total_replicas, active_replicas), the table works quickly.
|
||||
|
||||
For example, you can check that everything is working correctly like this:
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
# system.text\_log {#system_tables-text_log}
|
||||
# system.text_log {#system_tables-text_log}
|
||||
|
||||
Contains logging entries. Logging level which goes to this table can be limited with `text_log.level` server setting.
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
Contains stack traces collected by the sampling query profiler.
|
||||
|
||||
ClickHouse creates this table when the [trace\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query\_profiler\_real\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) and [query\_profiler\_cpu\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set.
|
||||
ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) and [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set.
|
||||
|
||||
To analyze logs, use the `addressToLine`, `addressToSymbol` and `demangle` introspection functions.
|
||||
|
||||
@ -27,7 +27,7 @@ Columns:
|
||||
|
||||
- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Thread identifier.
|
||||
|
||||
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query\_log](#system_tables-query_log) system table.
|
||||
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](#system_tables-query_log) system table.
|
||||
|
||||
- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process.
|
||||
|
||||
|
@ -57,7 +57,7 @@ When creating RAID-10, select the `far` layout.
|
||||
If your budget allows, choose RAID-10.
|
||||
|
||||
If you have more than 4 disks, use RAID-6 (preferred) or RAID-50, instead of RAID-5.
|
||||
When using RAID-5, RAID-6 or RAID-50, always increase stripe\_cache\_size, since the default value is usually not the best choice.
|
||||
When using RAID-5, RAID-6 or RAID-50, always increase stripe_cache_size, since the default value is usually not the best choice.
|
||||
|
||||
``` bash
|
||||
$ echo 4096 | sudo tee /sys/block/md2/md/stripe_cache_size
|
||||
|
@ -103,7 +103,7 @@ Check:
|
||||
|
||||
- Endpoint settings.
|
||||
|
||||
Check [listen\_host](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-listen_host) and [tcp\_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) settings.
|
||||
Check [listen_host](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-listen_host) and [tcp_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) settings.
|
||||
|
||||
ClickHouse server accepts localhost connections only by default.
|
||||
|
||||
@ -115,7 +115,7 @@ Check:
|
||||
|
||||
Check:
|
||||
|
||||
- The [tcp\_port\_secure](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) setting.
|
||||
- The [tcp_port_secure](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) setting.
|
||||
- Settings for [SSL certificates](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl).
|
||||
|
||||
Use proper parameters while connecting. For example, use the `port_secure` parameter with `clickhouse_client`.
|
||||
|
@ -492,6 +492,6 @@ Solution: Write in the GROUP BY query SearchPhrase HAVING uniqUpTo(4)(UserID) >=
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/agg_functions/parametric_functions/) <!--hide-->
|
||||
|
||||
## sumMapFiltered(keys\_to\_keep)(keys, values) {#summapfilteredkeys-to-keepkeys-values}
|
||||
## sumMapFiltered(keys_to_keep)(keys, values) {#summapfilteredkeys-to-keepkeys-values}
|
||||
|
||||
Same behavior as [sumMap](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) except that an array of keys is passed as a parameter. This can be especially useful when working with a high cardinality of keys.
|
||||
|
@ -26,7 +26,7 @@ In both cases the type of the returned value is [UInt64](../../../sql-reference/
|
||||
|
||||
**Details**
|
||||
|
||||
ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count\_distinct\_implementation](../../../operations/settings/settings.md#settings-count_distinct_implementation) setting. It defines which of the [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) function.
|
||||
ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count_distinct_implementation](../../../operations/settings/settings.md#settings-count_distinct_implementation) setting. It defines which of the [uniq\*](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) function.
|
||||
|
||||
The `SELECT count() FROM table` query is not optimized, because the number of entries in the table is not stored separately. It chooses a small column from the table and counts the number of values in it.
|
||||
|
||||
|
@ -34,8 +34,8 @@ The following table lists cases when query feature works in ClickHouse, but beha
|
||||
| E021-01 | CHARACTER data type | No{.text-danger} | |
|
||||
| E021-02 | CHARACTER VARYING data type | No{.text-danger} | `String` behaves similarly, but without length limit in parentheses |
|
||||
| E021-03 | Character literals | Partial{.text-warning} | No automatic concatenation of consecutive literals and character set support |
|
||||
| E021-04 | CHARACTER\_LENGTH function | Partial{.text-warning} | No `USING` clause |
|
||||
| E021-05 | OCTET\_LENGTH function | No{.text-danger} | `LENGTH` behaves similarly |
|
||||
| E021-04 | CHARACTER_LENGTH function | Partial{.text-warning} | No `USING` clause |
|
||||
| E021-05 | OCTET_LENGTH function | No{.text-danger} | `LENGTH` behaves similarly |
|
||||
| E021-06 | SUBSTRING | Partial{.text-warning} | No support for `SIMILAR` and `ESCAPE` clauses, no `SUBSTRING_REGEX` variant |
|
||||
| E021-07 | Character concatenation | Partial{.text-warning} | No `COLLATE` clause |
|
||||
| E021-08 | UPPER and LOWER functions | Yes{.text-success} | |
|
||||
@ -152,7 +152,7 @@ The following table lists cases when query feature works in ClickHouse, but beha
|
||||
| F051-03 | TIMESTAMP data type (including support of TIMESTAMP literal) with fractional seconds precision of at least 0 and 6 | No{.text-danger} | `DateTime64` time provides similar functionality |
|
||||
| F051-04 | Comparison predicate on DATE, TIME, and TIMESTAMP data types | Partial{.text-warning} | Only one data type available |
|
||||
| F051-05 | Explicit CAST between datetime types and character string types | Yes{.text-success} | |
|
||||
| F051-06 | CURRENT\_DATE | No{.text-danger} | `today()` is similar |
|
||||
| F051-06 | CURRENT_DATE | No{.text-danger} | `today()` is similar |
|
||||
| F051-07 | LOCALTIME | No{.text-danger} | `now()` is similar |
|
||||
| F051-08 | LOCALTIMESTAMP | No{.text-danger} | |
|
||||
| **F081** | **UNION and EXCEPT in views** | **Partial**{.text-warning} | |
|
||||
|
@ -27,9 +27,9 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t
|
||||
|
||||
The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter.
|
||||
|
||||
ClickHouse outputs values in `YYYY-MM-DD hh:mm:ss` text format by default. You can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
|
||||
ClickHouse outputs values depending on the value of the [date\_time\_output\_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionaly you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function.
|
||||
|
||||
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date\_time\_input\_format](../../operations/settings/settings.md#settings-date_time_input_format) setting.
|
||||
When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting.
|
||||
|
||||
## Examples {#examples}
|
||||
|
||||
@ -120,6 +120,7 @@ FROM dt
|
||||
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
|
||||
- [Functions for working with arrays](../../sql-reference/functions/array-functions.md)
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format)
|
||||
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)
|
||||
- [The `Date` data type](../../sql-reference/data-types/date.md)
|
||||
|
@ -96,6 +96,7 @@ FROM dt
|
||||
- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md)
|
||||
- [Functions for working with arrays](../../sql-reference/functions/array-functions.md)
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format)
|
||||
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)
|
||||
- [`Date` data type](../../sql-reference/data-types/date.md)
|
||||
|
@ -20,7 +20,7 @@ The `FixedString` type is efficient when data has the length of precisely `N` by
|
||||
Examples of the values that can be efficiently stored in `FixedString`-typed columns:
|
||||
|
||||
- The binary representation of IP addresses (`FixedString(16)` for IPv6).
|
||||
- Language codes (ru\_RU, en\_US … ).
|
||||
- Language codes (ru_RU, en_US … ).
|
||||
- Currency codes (USD, RUB … ).
|
||||
- Binary representation of hashes (`FixedString(16)` for MD5, `FixedString(32)` for SHA256).
|
||||
|
||||
|
@ -10,6 +10,6 @@ ClickHouse can store various kinds of data in table cells.
|
||||
|
||||
This section describes the supported data types and special considerations for using and/or implementing them if any.
|
||||
|
||||
You can check whether data type name is case-sensitive in the [system.data\_type\_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table.
|
||||
You can check whether data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/data_types/) <!--hide-->
|
||||
|
@ -15,7 +15,7 @@ LowCardinality(data_type)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `data_type` — [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), and numbers excepting [Decimal](../../sql-reference/data-types/decimal.md). `LowCardinality` is not efficient for some data types, see the [allow\_suspicious\_low\_cardinality\_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) setting description.
|
||||
- `data_type` — [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), and numbers excepting [Decimal](../../sql-reference/data-types/decimal.md). `LowCardinality` is not efficient for some data types, see the [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) setting description.
|
||||
|
||||
## Description {#lowcardinality-dscr}
|
||||
|
||||
@ -43,10 +43,10 @@ ORDER BY id
|
||||
|
||||
Settings:
|
||||
|
||||
- [low\_cardinality\_max\_dictionary\_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size)
|
||||
- [low\_cardinality\_use\_single\_dictionary\_for\_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part)
|
||||
- [low\_cardinality\_allow\_in\_native\_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format)
|
||||
- [allow\_suspicious\_low\_cardinality\_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types)
|
||||
- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size)
|
||||
- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part)
|
||||
- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format)
|
||||
- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types)
|
||||
|
||||
Functions:
|
||||
|
||||
|
@ -25,7 +25,7 @@ Look at the following hierarchical structure:
|
||||
|
||||
This hierarchy can be expressed as the following dictionary table.
|
||||
|
||||
| region\_id | parent\_region | region\_name |
|
||||
| region_id | parent_region | region_name |
|
||||
|------------|----------------|---------------|
|
||||
| 1 | 0 | Russia |
|
||||
| 2 | 1 | Moscow |
|
||||
|
@ -7,7 +7,7 @@ toc_title: Storing Dictionaries in Memory
|
||||
|
||||
There are a variety of ways to store dictionaries in memory.
|
||||
|
||||
We recommend [flat](#flat), [hashed](#dicts-external_dicts_dict_layout-hashed) and [complex\_key\_hashed](#complex-key-hashed). which provide optimal processing speed.
|
||||
We recommend [flat](#flat), [hashed](#dicts-external_dicts_dict_layout-hashed) and [complex_key_hashed](#complex-key-hashed). which provide optimal processing speed.
|
||||
|
||||
Caching is not recommended because of potentially poor performance and difficulties in selecting optimal parameters. Read more in the section “[cache](#cache)”.
|
||||
|
||||
@ -52,16 +52,16 @@ LAYOUT(LAYOUT_TYPE(param value)) -- layout settings
|
||||
|
||||
- [flat](#flat)
|
||||
- [hashed](#dicts-external_dicts_dict_layout-hashed)
|
||||
- [sparse\_hashed](#dicts-external_dicts_dict_layout-sparse_hashed)
|
||||
- [sparse_hashed](#dicts-external_dicts_dict_layout-sparse_hashed)
|
||||
- [cache](#cache)
|
||||
- [ssd\_cache](#ssd-cache)
|
||||
- [ssd_cache](#ssd-cache)
|
||||
- [direct](#direct)
|
||||
- [range\_hashed](#range-hashed)
|
||||
- [complex\_key\_hashed](#complex-key-hashed)
|
||||
- [complex\_key\_cache](#complex-key-cache)
|
||||
- [ssd\_complex\_key\_cache](#ssd-cache)
|
||||
- [complex\_key\_direct](#complex-key-direct)
|
||||
- [ip\_trie](#ip-trie)
|
||||
- [range_hashed](#range-hashed)
|
||||
- [complex_key_hashed](#complex-key-hashed)
|
||||
- [complex_key_cache](#complex-key-cache)
|
||||
- [ssd_complex_key_cache](#ssd-cache)
|
||||
- [complex_key_direct](#complex-key-direct)
|
||||
- [ip_trie](#ip-trie)
|
||||
|
||||
### flat {#flat}
|
||||
|
||||
@ -91,6 +91,8 @@ LAYOUT(FLAT())
|
||||
|
||||
The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items.
|
||||
|
||||
The hash table will be preallocated (this will make dictionary load faster), if the is approx number of total rows is known, this is supported only if the source is `clickhouse` without any `<where>` (since in case of `<where>` you can filter out too much rows and the dictionary will allocate too much memory, that will not be used eventually).
|
||||
|
||||
All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety.
|
||||
|
||||
Configuration example:
|
||||
@ -107,10 +109,12 @@ or
|
||||
LAYOUT(HASHED())
|
||||
```
|
||||
|
||||
### sparse\_hashed {#dicts-external_dicts_dict_layout-sparse_hashed}
|
||||
### sparse_hashed {#dicts-external_dicts_dict_layout-sparse_hashed}
|
||||
|
||||
Similar to `hashed`, but uses less memory in favor more CPU usage.
|
||||
|
||||
It will be also preallocated so as `hashed`, note that it is even more significant for `sparse_hashed`.
|
||||
|
||||
Configuration example:
|
||||
|
||||
``` xml
|
||||
@ -123,7 +127,7 @@ Configuration example:
|
||||
LAYOUT(SPARSE_HASHED())
|
||||
```
|
||||
|
||||
### complex\_key\_hashed {#complex-key-hashed}
|
||||
### complex_key_hashed {#complex-key-hashed}
|
||||
|
||||
This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `hashed`.
|
||||
|
||||
@ -139,7 +143,7 @@ Configuration example:
|
||||
LAYOUT(COMPLEX_KEY_HASHED())
|
||||
```
|
||||
|
||||
### range\_hashed {#range-hashed}
|
||||
### range_hashed {#range-hashed}
|
||||
|
||||
The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values.
|
||||
|
||||
@ -294,11 +298,11 @@ Set a large enough cache size. You need to experiment to select the number of ce
|
||||
!!! warning "Warning"
|
||||
Do not use ClickHouse as a source, because it is slow to process queries with random reads.
|
||||
|
||||
### complex\_key\_cache {#complex-key-cache}
|
||||
### complex_key_cache {#complex-key-cache}
|
||||
|
||||
This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `cache`.
|
||||
|
||||
### ssd\_cache {#ssd-cache}
|
||||
### ssd_cache {#ssd-cache}
|
||||
|
||||
Similar to `cache`, but stores data on SSD and index in RAM.
|
||||
|
||||
@ -328,9 +332,9 @@ LAYOUT(CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
|
||||
PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict MAX_STORED_KEYS 1048576))
|
||||
```
|
||||
|
||||
### complex\_key\_ssd\_cache {#complex-key-ssd-cache}
|
||||
### complex_key_ssd_cache {#complex-key-ssd-cache}
|
||||
|
||||
This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `ssd\_cache`.
|
||||
This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `ssd_cache`.
|
||||
|
||||
### direct {#direct}
|
||||
|
||||
@ -354,11 +358,11 @@ or
|
||||
LAYOUT(DIRECT())
|
||||
```
|
||||
|
||||
### complex\_key\_direct {#complex-key-direct}
|
||||
### complex_key_direct {#complex-key-direct}
|
||||
|
||||
This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `direct`.
|
||||
|
||||
### ip\_trie {#ip-trie}
|
||||
### ip_trie {#ip-trie}
|
||||
|
||||
This type of storage is for mapping network prefixes (IP addresses) to metadata such as ASN.
|
||||
|
||||
|
@ -13,9 +13,9 @@ ClickHouse:
|
||||
- Periodically updates dictionaries and dynamically loads missing values. In other words, dictionaries can be loaded dynamically.
|
||||
- Allows to create external dictionaries with xml files or [DDL queries](../../../sql-reference/statements/create/dictionary.md).
|
||||
|
||||
The configuration of external dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries\_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config) parameter.
|
||||
The configuration of external dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config) parameter.
|
||||
|
||||
Dictionaries can be loaded at server startup or at first use, depending on the [dictionaries\_lazy\_load](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load) setting.
|
||||
Dictionaries can be loaded at server startup or at first use, depending on the [dictionaries_lazy_load](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load) setting.
|
||||
|
||||
The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tables-dictionaries) system table contains information about dictionaries configured at server. For each dictionary you can find there:
|
||||
|
||||
|
@ -1099,7 +1099,7 @@ Result:
|
||||
|
||||
## arrayAUC {#arrayauc}
|
||||
|
||||
Calculate AUC (Area Under the Curve, which is a concept in machine learning, see more details: https://en.wikipedia.org/wiki/Receiver\_operating\_characteristic\#Area\_under\_the\_curve).
|
||||
Calculate AUC (Area Under the Curve, which is a concept in machine learning, see more details: https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve).
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -63,7 +63,7 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res
|
||||
|
||||
## bitmapSubsetInRange {#bitmap-functions-bitmapsubsetinrange}
|
||||
|
||||
Return subset in specified range (not include the range\_end).
|
||||
Return subset in specified range (not include the range_end).
|
||||
|
||||
``` sql
|
||||
bitmapSubsetInRange(bitmap, range_start, range_end)
|
||||
@ -239,7 +239,7 @@ SELECT bitmapCardinality(bitmapBuild([1, 2, 3, 4, 5])) AS res
|
||||
|
||||
## bitmapMin {#bitmapmin}
|
||||
|
||||
Retrun the smallest value of type UInt64 in the set, UINT32\_MAX if the set is empty.
|
||||
Retrun the smallest value of type UInt64 in the set, UINT32_MAX if the set is empty.
|
||||
|
||||
bitmapMin(bitmap)
|
||||
|
||||
@ -286,8 +286,8 @@ Transform an array of values in a bitmap to another array of values, the result
|
||||
**Parameters**
|
||||
|
||||
- `bitmap` – bitmap object.
|
||||
- `from_array` – UInt32 array. For idx in range \[0, from\_array.size()), if bitmap contains from\_array\[idx\], then replace it with to\_array\[idx\]. Note that the result depends on array ordering if there are common elements between from\_array and to\_array.
|
||||
- `to_array` – UInt32 array, its size shall be the same to from\_array.
|
||||
- `from_array` – UInt32 array. For idx in range \[0, from_array.size()), if bitmap contains from_array\[idx\], then replace it with to_array\[idx\]. Note that the result depends on array ordering if there are common elements between from_array and to_array.
|
||||
- `to_array` – UInt32 array, its size shall be the same to from_array.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -218,7 +218,7 @@ Rounds down a date with time to the start of the ten-minute interval.
|
||||
|
||||
Rounds down the date with time to the start of the fifteen-minute interval.
|
||||
|
||||
## toStartOfInterval(time\_or\_data, INTERVAL x unit \[, time\_zone\]) {#tostartofintervaltime-or-data-interval-x-unit-time-zone}
|
||||
## toStartOfInterval(time_or_data, INTERVAL x unit \[, time_zone\]) {#tostartofintervaltime-or-data-interval-x-unit-time-zone}
|
||||
|
||||
This is a generalization of other functions named `toStartOf*`. For example,
|
||||
`toStartOfInterval(t, INTERVAL 1 year)` returns the same as `toStartOfYear(t)`,
|
||||
@ -339,7 +339,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
|
||||
└────────────┴───────────┴───────────┴───────────┘
|
||||
```
|
||||
|
||||
## date_trunc(datepart, time\_or\_data\[, time\_zone\]), dateTrunc(datepart, time\_or\_data\[, time\_zone\]) {#date_trunc}
|
||||
## date_trunc(datepart, time_or_data\[, time_zone\]), dateTrunc(datepart, time_or_data\[, time_zone\]) {#date_trunc}
|
||||
|
||||
Truncates a date or date with time based on the specified datepart, such as
|
||||
- `second`
|
||||
@ -486,20 +486,32 @@ For a time interval starting at ‘StartTime’ and continuing for ‘Duration
|
||||
For example, `timeSlots(toDateTime('2012-01-01 12:20:00'), 600) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')]`.
|
||||
This is necessary for searching for pageviews in the corresponding session.
|
||||
|
||||
## formatDateTime(Time, Format\[, Timezone\]) {#formatdatetime}
|
||||
## formatDateTime {#formatdatetime}
|
||||
|
||||
Function formats a Time according given Format string. N.B.: Format is a constant expression, e.g. you can not have multiple formats for single result column.
|
||||
|
||||
Supported modifiers for Format:
|
||||
(“Example” column shows formatting result for time `2018-01-02 22:33:44`)
|
||||
**Syntax**
|
||||
|
||||
| Modifier | Description | Example |
|
||||
``` sql
|
||||
formatDateTime(Time, Format\[, Timezone\])
|
||||
```
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
Returnes time and date values according to the determined format.
|
||||
|
||||
**Replacement fields**
|
||||
Using replacement fields, you can define a pattern for the resulting string. “Example” column shows formatting result for `2018-01-02 22:33:44`.
|
||||
|
||||
| Placeholder | Description | Example |
|
||||
|----------|---------------------------------------------------------|------------|
|
||||
| %C | year divided by 100 and truncated to integer (00-99) | 20 |
|
||||
| %d | day of the month, zero-padded (01-31) | 02 |
|
||||
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
|
||||
| %e | day of the month, space-padded ( 1-31) | 2 |
|
||||
| %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 |
|
||||
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
|
||||
| %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 |
|
||||
| %H | hour in 24h format (00-23) | 22 |
|
||||
| %I | hour in 12h format (01-12) | 10 |
|
||||
| %j | day of the year (001-366) | 002 |
|
||||
@ -518,6 +530,22 @@ Supported modifiers for Format:
|
||||
| %Y | Year | 2018 |
|
||||
| %% | a % sign | % |
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT formatDateTime(toDate('2010-01-04'), '%g')
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```
|
||||
┌─formatDateTime(toDate('2010-01-04'), '%g')─┐
|
||||
│ 10 │
|
||||
└────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) <!--hide-->
|
||||
|
||||
## FROM_UNIXTIME
|
||||
|
@ -315,7 +315,7 @@ Calculates JumpConsistentHash form a UInt64.
|
||||
Accepts two arguments: a UInt64-type key and the number of buckets. Returns Int32.
|
||||
For more information, see the link: [JumpConsistentHash](https://arxiv.org/pdf/1406.2294.pdf)
|
||||
|
||||
## murmurHash2\_32, murmurHash2\_64 {#murmurhash2-32-murmurhash2-64}
|
||||
## murmurHash2_32, murmurHash2_64 {#murmurhash2-32-murmurhash2-64}
|
||||
|
||||
Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value.
|
||||
|
||||
@ -383,7 +383,7 @@ Result:
|
||||
└──────────────────────┴─────────────────────┘
|
||||
```
|
||||
|
||||
## murmurHash3\_32, murmurHash3\_64 {#murmurhash3-32-murmurhash3-64}
|
||||
## murmurHash3_32, murmurHash3_64 {#murmurhash3-32-murmurhash3-64}
|
||||
|
||||
Produces a [MurmurHash3](https://github.com/aappleby/smhasher) hash value.
|
||||
|
||||
@ -413,7 +413,7 @@ SELECT murmurHash3_32(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:
|
||||
└─────────────┴────────┘
|
||||
```
|
||||
|
||||
## murmurHash3\_128 {#murmurhash3-128}
|
||||
## murmurHash3_128 {#murmurhash3-128}
|
||||
|
||||
Produces a 128-bit [MurmurHash3](https://github.com/aappleby/smhasher) hash value.
|
||||
|
||||
|
@ -14,11 +14,11 @@ For proper operation of introspection functions:
|
||||
|
||||
- Install the `clickhouse-common-static-dbg` package.
|
||||
|
||||
- Set the [allow\_introspection\_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting to 1.
|
||||
- Set the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting to 1.
|
||||
|
||||
For security reasons introspection functions are disabled by default.
|
||||
|
||||
ClickHouse saves profiler reports to the [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table. Make sure the table and profiler are configured properly.
|
||||
ClickHouse saves profiler reports to the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table. Make sure the table and profiler are configured properly.
|
||||
|
||||
## addressToLine {#addresstoline}
|
||||
|
||||
|
@ -73,7 +73,7 @@ SELECT isValidJSON('{"a": "hello", "b": [-100, 200.0, 300]}') = 1
|
||||
SELECT isValidJSON('not a json') = 0
|
||||
```
|
||||
|
||||
## JSONHas(json\[, indices\_or\_keys\]…) {#jsonhasjson-indices-or-keys}
|
||||
## JSONHas(json\[, indices_or_keys\]…) {#jsonhasjson-indices-or-keys}
|
||||
|
||||
If the value exists in the JSON document, `1` will be returned.
|
||||
|
||||
@ -106,7 +106,7 @@ SELECT JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a'
|
||||
SELECT JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello'
|
||||
```
|
||||
|
||||
## JSONLength(json\[, indices\_or\_keys\]…) {#jsonlengthjson-indices-or-keys}
|
||||
## JSONLength(json\[, indices_or_keys\]…) {#jsonlengthjson-indices-or-keys}
|
||||
|
||||
Return the length of a JSON array or a JSON object.
|
||||
|
||||
@ -119,7 +119,7 @@ SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3
|
||||
SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2
|
||||
```
|
||||
|
||||
## JSONType(json\[, indices\_or\_keys\]…) {#jsontypejson-indices-or-keys}
|
||||
## JSONType(json\[, indices_or_keys\]…) {#jsontypejson-indices-or-keys}
|
||||
|
||||
Return the type of a JSON value.
|
||||
|
||||
@ -133,13 +133,13 @@ SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String'
|
||||
SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array'
|
||||
```
|
||||
|
||||
## JSONExtractUInt(json\[, indices\_or\_keys\]…) {#jsonextractuintjson-indices-or-keys}
|
||||
## JSONExtractUInt(json\[, indices_or_keys\]…) {#jsonextractuintjson-indices-or-keys}
|
||||
|
||||
## JSONExtractInt(json\[, indices\_or\_keys\]…) {#jsonextractintjson-indices-or-keys}
|
||||
## JSONExtractInt(json\[, indices_or_keys\]…) {#jsonextractintjson-indices-or-keys}
|
||||
|
||||
## JSONExtractFloat(json\[, indices\_or\_keys\]…) {#jsonextractfloatjson-indices-or-keys}
|
||||
## JSONExtractFloat(json\[, indices_or_keys\]…) {#jsonextractfloatjson-indices-or-keys}
|
||||
|
||||
## JSONExtractBool(json\[, indices\_or\_keys\]…) {#jsonextractbooljson-indices-or-keys}
|
||||
## JSONExtractBool(json\[, indices_or_keys\]…) {#jsonextractbooljson-indices-or-keys}
|
||||
|
||||
Parses a JSON and extract a value. These functions are similar to `visitParam` functions.
|
||||
|
||||
@ -153,7 +153,7 @@ SELECT JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200
|
||||
SELECT JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300
|
||||
```
|
||||
|
||||
## JSONExtractString(json\[, indices\_or\_keys\]…) {#jsonextractstringjson-indices-or-keys}
|
||||
## JSONExtractString(json\[, indices_or_keys\]…) {#jsonextractstringjson-indices-or-keys}
|
||||
|
||||
Parses a JSON and extract a string. This function is similar to `visitParamExtractString` functions.
|
||||
|
||||
@ -171,7 +171,7 @@ SELECT JSONExtractString('{"abc":"\\u263"}', 'abc') = ''
|
||||
SELECT JSONExtractString('{"abc":"hello}', 'abc') = ''
|
||||
```
|
||||
|
||||
## JSONExtract(json\[, indices\_or\_keys…\], Return\_type) {#jsonextractjson-indices-or-keys-return-type}
|
||||
## JSONExtract(json\[, indices_or_keys…\], Return_type) {#jsonextractjson-indices-or-keys-return-type}
|
||||
|
||||
Parses a JSON and extract a value of the given ClickHouse data type.
|
||||
|
||||
@ -192,7 +192,7 @@ SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday
|
||||
SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday'
|
||||
```
|
||||
|
||||
## JSONExtractKeysAndValues(json\[, indices\_or\_keys…\], Value\_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type}
|
||||
## JSONExtractKeysAndValues(json\[, indices_or_keys…\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type}
|
||||
|
||||
Parses key-value pairs from a JSON where the values are of the given ClickHouse data type.
|
||||
|
||||
@ -202,7 +202,7 @@ Example:
|
||||
SELECT JSONExtractKeysAndValues('{"x": {"a": 5, "b": 7, "c": 11}}', 'x', 'Int8') = [('a',5),('b',7),('c',11)]
|
||||
```
|
||||
|
||||
## JSONExtractRaw(json\[, indices\_or\_keys\]…) {#jsonextractrawjson-indices-or-keys}
|
||||
## JSONExtractRaw(json\[, indices_or_keys\]…) {#jsonextractrawjson-indices-or-keys}
|
||||
|
||||
Returns a part of JSON as unparsed string.
|
||||
|
||||
@ -214,7 +214,7 @@ Example:
|
||||
SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = '[-100, 200.0, 300]'
|
||||
```
|
||||
|
||||
## JSONExtractArrayRaw(json\[, indices\_or\_keys…\]) {#jsonextractarrayrawjson-indices-or-keys}
|
||||
## JSONExtractArrayRaw(json\[, indices_or_keys…\]) {#jsonextractarrayrawjson-indices-or-keys}
|
||||
|
||||
Returns an array with elements of JSON array, each represented as unparsed string.
|
||||
|
||||
|
@ -417,7 +417,7 @@ ORDER BY h ASC
|
||||
Transforms a value according to the explicitly defined mapping of some elements to other ones.
|
||||
There are two variations of this function:
|
||||
|
||||
### transform(x, array\_from, array\_to, default) {#transformx-array-from-array-to-default}
|
||||
### transform(x, array_from, array_to, default) {#transformx-array-from-array-to-default}
|
||||
|
||||
`x` – What to transform.
|
||||
|
||||
@ -437,7 +437,7 @@ Types:
|
||||
Where the same letter is indicated (T or U), for numeric types these might not be matching types, but types that have a common type.
|
||||
For example, the first argument can have the Int64 type, while the second has the Array(UInt16) type.
|
||||
|
||||
If the ‘x’ value is equal to one of the elements in the ‘array\_from’ array, it returns the existing element (that is numbered the same) from the ‘array\_to’ array. Otherwise, it returns ‘default’. If there are multiple matching elements in ‘array\_from’, it returns one of the matches.
|
||||
If the ‘x’ value is equal to one of the elements in the ‘array_from’ array, it returns the existing element (that is numbered the same) from the ‘array_to’ array. Otherwise, it returns ‘default’. If there are multiple matching elements in ‘array_from’, it returns one of the matches.
|
||||
|
||||
Example:
|
||||
|
||||
@ -459,10 +459,10 @@ ORDER BY c DESC
|
||||
└───────────┴────────┘
|
||||
```
|
||||
|
||||
### transform(x, array\_from, array\_to) {#transformx-array-from-array-to}
|
||||
### transform(x, array_from, array_to) {#transformx-array-from-array-to}
|
||||
|
||||
Differs from the first variation in that the ‘default’ argument is omitted.
|
||||
If the ‘x’ value is equal to one of the elements in the ‘array\_from’ array, it returns the matching element (that is numbered the same) from the ‘array\_to’ array. Otherwise, it returns ‘x’.
|
||||
If the ‘x’ value is equal to one of the elements in the ‘array_from’ array, it returns the matching element (that is numbered the same) from the ‘array_to’ array. Otherwise, it returns ‘x’.
|
||||
|
||||
Types:
|
||||
|
||||
@ -538,6 +538,51 @@ SELECT
|
||||
└────────────────┴───────────────────┘
|
||||
```
|
||||
|
||||
## formatReadableTimeDelta {#formatreadabletimedelta}
|
||||
|
||||
Accepts the time delta in seconds. Returns a time delta with (year, month, day, hour, minute, second) as a string.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
formatReadableTimeDelta(column[, maximum_unit])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `column` — A column with numeric time delta.
|
||||
- `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
arrayJoin([100, 12345, 432546534]) AS elapsed,
|
||||
formatReadableTimeDelta(elapsed) AS time_delta
|
||||
```
|
||||
|
||||
``` text
|
||||
┌────elapsed─┬─time_delta ─────────────────────────────────────────────────────┐
|
||||
│ 100 │ 1 minute and 40 seconds │
|
||||
│ 12345 │ 3 hours, 25 minutes and 45 seconds │
|
||||
│ 432546534 │ 13 years, 8 months, 17 days, 7 hours, 48 minutes and 54 seconds │
|
||||
└────────────┴─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
arrayJoin([100, 12345, 432546534]) AS elapsed,
|
||||
formatReadableTimeDelta(elapsed, 'minutes') AS time_delta
|
||||
```
|
||||
|
||||
``` text
|
||||
┌────elapsed─┬─time_delta ─────────────────────────────────────────────────────┐
|
||||
│ 100 │ 1 minute and 40 seconds │
|
||||
│ 12345 │ 205 minutes and 45 seconds │
|
||||
│ 432546534 │ 7209108 minutes and 54 seconds │
|
||||
└────────────┴─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## least(a, b) {#leasta-b}
|
||||
|
||||
Returns the smallest value from a and b.
|
||||
@ -1248,7 +1293,7 @@ joinGet(join_storage_table_name, `value_column`, join_keys)
|
||||
|
||||
Returns list of values corresponded to list of keys.
|
||||
|
||||
If certain doesn’t exist in source table then `0` or `null` will be returned based on [join\_use\_nulls](../../operations/settings/settings.md#join_use_nulls) setting.
|
||||
If certain doesn’t exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting.
|
||||
|
||||
More info about `join_use_nulls` in [Join operation](../../engines/table-engines/special/join.md).
|
||||
|
||||
@ -1287,15 +1332,15 @@ Result:
|
||||
└──────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## modelEvaluate(model\_name, …) {#function-modelevaluate}
|
||||
## modelEvaluate(model_name, …) {#function-modelevaluate}
|
||||
|
||||
Evaluate external model.
|
||||
Accepts a model name and model arguments. Returns Float64.
|
||||
|
||||
## throwIf(x\[, custom\_message\]) {#throwifx-custom-message}
|
||||
## throwIf(x\[, custom_message\]) {#throwifx-custom-message}
|
||||
|
||||
Throw an exception if the argument is non zero.
|
||||
custom\_message - is an optional parameter: a constant string, provides an error message
|
||||
custom_message - is an optional parameter: a constant string, provides an error message
|
||||
|
||||
``` sql
|
||||
SELECT throwIf(number = 3, 'Too many') FROM numbers(10);
|
||||
|
@ -32,12 +32,12 @@ The function also works for arrays.
|
||||
Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn’t throw an exception).
|
||||
The result type is UInt64.
|
||||
|
||||
## char\_length, CHAR\_LENGTH {#char-length}
|
||||
## char_length, CHAR_LENGTH {#char-length}
|
||||
|
||||
Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn’t throw an exception).
|
||||
The result type is UInt64.
|
||||
|
||||
## character\_length, CHARACTER\_LENGTH {#character-length}
|
||||
## character_length, CHARACTER_LENGTH {#character-length}
|
||||
|
||||
Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it doesn’t throw an exception).
|
||||
The result type is UInt64.
|
||||
@ -78,7 +78,7 @@ toValidUTF8( input_string )
|
||||
|
||||
Parameters:
|
||||
|
||||
- input\_string — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
|
||||
- input_string — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
|
||||
|
||||
Returned value: Valid UTF-8 string.
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user