Merge branch 'master' into add-more-s3-tests

This commit is contained in:
mergify[bot] 2022-04-19 10:11:08 +00:00 committed by GitHub
commit b5058f9770
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
243 changed files with 1485 additions and 2456 deletions

View File

@ -2,7 +2,7 @@
name: Debug
'on':
[push, pull_request, release]
[push, pull_request, release, workflow_dispatch]
jobs:
DebugInfo:

View File

@ -314,6 +314,15 @@ if (ENABLE_BUILD_PATH_MAPPING)
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
endif ()
option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF)
if (ENABLE_BUILD_PROFILING)
if (COMPILER_CLANG)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ftime-trace")
else ()
message (${RECONFIGURE_MESSAGE_LEVEL} "Build profiling is only available with CLang")
endif ()
endif ()
if (${CMAKE_VERSION} VERSION_LESS "3.12.4")
# CMake < 3.12 doesn't support setting 20 as a C++ standard version.
# We will add C++ standard controlling flag in CMAKE_CXX_FLAGS manually for now.

View File

@ -14,8 +14,8 @@
uint64_t getAvailableMemoryAmountOrZero()
{
#if defined(_SC_AVPHYS_PAGES) // linux
return getPageSize() * sysconf(_SC_AVPHYS_PAGES);
#if defined(_SC_PHYS_PAGES) // linux
return getPageSize() * sysconf(_SC_PHYS_PAGES);
#elif defined(__FreeBSD__)
struct vmtotal vmt;
size_t vmt_size = sizeof(vmt);

View File

@ -2,7 +2,7 @@
#pragma clang diagnostic ignored "-Wreserved-identifier"
#endif
/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/
#include <base/defines.h>

View File

@ -1,6 +1,6 @@
#pragma once
/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/
/** Collects all dl_phdr_info items and caches them in a static array.
* Also rewrites dl_iterate_phdr with a lock-free version which consults the above cache

View File

@ -76,10 +76,10 @@ public:
/// return none if daemon doesn't exist, reference to the daemon otherwise
static std::optional<std::reference_wrapper<BaseDaemon>> tryGetInstance() { return tryGetInstance<BaseDaemon>(); }
/// В Graphite компоненты пути(папки) разделяются точкой.
/// У нас принят путь формата root_path.hostname_yandex_ru.key
/// root_path по умолчанию one_min
/// key - лучше группировать по смыслу. Например "meminfo.cached" или "meminfo.free", "meminfo.total"
/// Graphite metric name has components separated by dots.
/// We used to have the following format: root_path.hostname_clickhouse_com.key
/// root_path - one_min by default
/// key - something that makes sense. Examples: "meminfo.cached" or "meminfo.free", "meminfo.total".
template <class T>
void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
{

2
contrib/sysroot vendored

@ -1 +1 @@
Subproject commit bbcac834526d90d1e764164b861be426891d1743
Subproject commit e9fb375d0a1e5ebfd74c043f088f2342552103f8

18
debian/.gitignore vendored
View File

@ -1,18 +0,0 @@
control
copyright
tmp/
clickhouse-benchmark/
clickhouse-client.docs
clickhouse-client/
clickhouse-common-static-dbg/
clickhouse-common-static.docs
clickhouse-common-static/
clickhouse-server-base/
clickhouse-server-common/
clickhouse-server/
debhelper-build-stamp
files
*.debhelper.log
*.debhelper
*.substvars

223
debian/.pbuilderrc vendored
View File

@ -1,223 +0,0 @@
#
# sudo apt install pbuilder fakeroot debhelper debian-archive-keyring debian-keyring
#
# ubuntu:
# prepare old (trusty or earlier) host system:
# sudo ln -s gutsy /usr/share/debootstrap/scripts/eoan
# sudo ln -s gutsy /usr/share/debootstrap/scripts/disco
# sudo ln -s gutsy /usr/share/debootstrap/scripts/cosmic
# sudo ln -s gutsy /usr/share/debootstrap/scripts/artful
# sudo ln -s gutsy /usr/share/debootstrap/scripts/bionic
# sudo ln -s sid /usr/share/debootstrap/scripts/buster
# build ubuntu:
# sudo DIST=bionic pbuilder create --configfile debian/.pbuilderrc && DIST=bionic pdebuild --configfile debian/.pbuilderrc
# sudo DIST=cosmic pbuilder create --configfile debian/.pbuilderrc && DIST=cosmic pdebuild --configfile debian/.pbuilderrc
# sudo DIST=disco pbuilder create --configfile debian/.pbuilderrc && DIST=disco pdebuild --configfile debian/.pbuilderrc
# sudo DIST=eoan pbuilder create --configfile debian/.pbuilderrc && DIST=eoan pdebuild --configfile debian/.pbuilderrc
# sudo DIST=devel pbuilder create --configfile debian/.pbuilderrc && DIST=devel pdebuild --configfile debian/.pbuilderrc
# build debian:
# sudo DIST=stable pbuilder create --configfile debian/.pbuilderrc && DIST=stable pdebuild --configfile debian/.pbuilderrc
# sudo DIST=testing pbuilder create --configfile debian/.pbuilderrc && DIST=testing pdebuild --configfile debian/.pbuilderrc
# sudo DIST=unstable pbuilder create --configfile debian/.pbuilderrc && DIST=unstable pdebuild --configfile debian/.pbuilderrc
# sudo DIST=experimental pbuilder create --configfile debian/.pbuilderrc && DIST=experimental pdebuild --configfile debian/.pbuilderrc
# build i386 experimental:
# sudo DIST=trusty ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=trusty ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=xenial ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=xenial ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=zesty ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=zesty ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=artful ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=artful ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=bionic ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=bionic ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=stable ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=stable ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=testing ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=testing ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=experimental ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=experimental ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# test gcc-9
# env DEB_CC=gcc-9 DEB_CXX=g++-9 EXTRAPACKAGES="g++-9 gcc-9" DIST=disco pdebuild --configfile debian/.pbuilderrc
# use only clang:
# env DEB_CC=clang-8 DEB_CXX=clang++-8 EXTRAPACKAGES=clang-8 DIST=disco pdebuild --configfile debian/.pbuilderrc
# env DEB_CC=clang-5.0 DEB_CXX=clang++-5.0 EXTRAPACKAGES=clang-5.0 DIST=artful pdebuild --configfile debian/.pbuilderrc
# clang+asan:
# env DEB_CC=clang-5.0 DEB_CXX=clang++-5.0 EXTRAPACKAGES="clang-5.0 libc++abi-dev libc++-dev" CMAKE_FLAGS="-DENABLE_TCMALLOC=0 -DENABLE_UNWIND=0 -DCMAKE_BUILD_TYPE=Asan" DIST=artful pdebuild --configfile debian/.pbuilderrc
# clang+tsan:
# env DEB_CC=clang-5.0 DEB_CXX=clang++-5.0 EXTRAPACKAGES="clang-5.0 libc++abi-dev libc++-dev" CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Tsan" DIST=artful pdebuild --configfile debian/.pbuilderrc
# without sse for old systems and some VM:
# env DH_VERBOSE=1 CMAKE_FLAGS="-DHAVE_SSE41=0 -DHAVE_SSE42=0 -DHAVE_POPCNT=0 -DHAVE_SSE2_INTRIN=0 -DSSE2FLAG=' ' -DHAVE_SSE42_INTRIN=0 -DSSE4FLAG=' ' -DHAVE_PCLMULQDQ_INTRIN=0 -DPCLMULFLAG=' '" DIST=artful pdebuild --configfile debian/.pbuilderrc
# Note: on trusty host creating some future dists can fail (debootstrap error).
# Your packages built here: /var/cache/pbuilder/*-*/result
# from https://wiki.debian.org/PbuilderTricks :
# Codenames for Debian suites according to their alias. Update these when
# needed.
UNSTABLE_CODENAME="sid"
TESTING_CODENAME="buster"
STABLE_CODENAME="stretch"
STABLE_BACKPORTS_SUITE="$STABLE_CODENAME-backports"
# List of Debian suites.
DEBIAN_SUITES=($UNSTABLE_CODENAME $TESTING_CODENAME $STABLE_CODENAME $STABLE_BACKPORTS_SUITE
"experimental" "unstable" "testing" "stable")
# List of Ubuntu suites. Update these when needed.
UBUNTU_SUITES=("eoan" "disco" "cosmic" "bionic" "artful" "zesty" "xenial" "trusty" "devel")
# Set a default distribution if none is used. Note that you can set your own default (i.e. ${DIST:="unstable"}).
HOST_DIST=`lsb_release --short --codename`
: ${DIST:="$HOST_DIST"}
# Optionally change Debian codenames in $DIST to their aliases.
case "$DIST" in
$UNSTABLE_CODENAME)
DIST="unstable"
;;
$TESTING_CODENAME)
DIST="testing"
;;
$STABLE_CODENAME)
DIST="stable"
;;
esac
# Optionally set the architecture to the host architecture if none set. Note
# that you can set your own default (i.e. ${ARCH:="i386"}).
: ${ARCH:="$(dpkg --print-architecture)"}
NAME="$DIST"
if [ -n "${ARCH}" ]; then
NAME="$NAME-$ARCH"
DEBOOTSTRAPOPTS=("--arch" "$ARCH" "${DEBOOTSTRAPOPTS[@]}")
fi
BASETGZ=${SET_BASETGZ}
BASETGZ=${BASETGZ:="/var/cache/pbuilder/$NAME-base.tgz"}
DISTRIBUTION="$DIST"
BUILDRESULT=${SET_BUILDRESULT}
BUILDRESULT=${BUILDRESULT:="/var/cache/pbuilder/$NAME/result/"}
APTCACHE="/var/cache/pbuilder/$NAME/aptcache/"
BUILDPLACE="/var/cache/pbuilder/build/"
ALLOWUNTRUSTED=${SET_ALLOWUNTRUSTED:=${ALLOWUNTRUSTED}}
#DEBOOTSTRAPOPTS=( '--variant=buildd' $SET_DEBOOTSTRAPOPTS )
if $(echo ${DEBIAN_SUITES[@]} | grep -q $DIST); then
# Debian configuration
OSNAME=debian
MIRRORSITE=${SET_MIRRORSITE="http://deb.debian.org/$OSNAME/"}
COMPONENTS="main contrib non-free"
if $(echo "$STABLE_CODENAME stable" | grep -q $DIST); then
OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $STABLE_BACKPORTS_SUITE $COMPONENTS"
fi
# APTKEYRINGS=/usr/share/keyrings/debian-archive-keyring.gpg
case "$HOST_DIST" in
"trusty" )
DEBOOTSTRAPOPTS+=( '--no-check-gpg' )
;;
*)
DEBOOTSTRAPOPTS+=( '--keyring' '/usr/share/keyrings/debian-archive-keyring.gpg' )
# DEBOOTSTRAPOPTS+=( '--keyring' '/usr/share/keyrings/debian-keyring.gpg' )
esac
elif $(echo ${UBUNTU_SUITES[@]} | grep -q $DIST); then
# Ubuntu configuration
OSNAME=ubuntu
if [[ "$ARCH" == "amd64" || "$ARCH" == "i386" ]]; then
MIRRORSITE=${SET_MIRRORSITE="http://archive.ubuntu.com/$OSNAME/"}
else
MIRRORSITE=${SET_MIRRORSITE="http://ports.ubuntu.com/ubuntu-ports/"}
fi
COMPONENTS="main restricted universe multiverse"
OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $DIST-updates main restricted universe multiverse"
OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $DIST-security main restricted universe multiverse"
OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $DIST-proposed main restricted universe multiverse"
case "$DIST" in
"trusty" | "xenial" )
OTHERMIRROR="$OTHERMIRROR | deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/$OSNAME $DIST main"
ALLOWUNTRUSTED=yes
;;
esac
# deb http://apt.llvm.org/zesty/ llvm-toolchain-zesty-5.0 main
else
echo "Unknown distribution: $DIST"
exit 1
fi
echo "using $NAME $OSNAME $DIST $ARCH $LOGNAME $MIRRORSITE"
case "$DIST" in
"trusty")
# ccache broken
;;
*)
CCACHEDIR=${SET_CCACHEDIR:="/var/cache/pbuilder/ccache"}
;;
esac
# old systems with default gcc <= 6
case "$DIST" in
"trusty" | "xenial" | "stable" )
export DEB_CC=gcc-7
export DEB_CXX=g++-7
;;
esac
if [ "$ARCH" != arm64 ]; then
case "$DIST" in
# TODO: fix llvm-8 and use for "disco" and "eoan"
"experimental")
EXTRAPACKAGES+=" liblld-8-dev libclang-8-dev llvm-8-dev liblld-8 "
export CMAKE_FLAGS="-DLLVM_VERSION=8 $CMAKE_FLAGS"
;;
"eoan" | "disco" | "cosmic" | "testing" | "unstable")
EXTRAPACKAGES+=" liblld-7-dev libclang-7-dev llvm-7-dev liblld-7 "
export CMAKE_FLAGS="-DLLVM_VERSION=7 $CMAKE_FLAGS"
;;
"bionic")
EXTRAPACKAGES+=" liblld-6.0-dev libclang-6.0-dev liblld-6.0 "
export CMAKE_FLAGS="-DLLVM_VERSION=6 $CMAKE_FLAGS"
;;
"artful" )
EXTRAPACKAGES+=" liblld-5.0-dev libclang-5.0-dev liblld-5.0 "
;;
esac
else
export CMAKE_FLAGS="-DENABLE_EMBEDDED_COMPILER=0 $CMAKE_FLAGS"
fi
# Will test symbols
#EXTRAPACKAGES+=" gdb "
# For killall in pbuilder-hooks:
EXTRAPACKAGES+=" psmisc "
[[ $CCACHE_PREFIX == 'distcc' ]] && EXTRAPACKAGES+=" $CCACHE_PREFIX " && USENETWORK=yes && export DISTCC_DIR=/var/cache/pbuilder/distcc
[[ $ARCH == 'i386' ]] && EXTRAPACKAGES+=" libssl-dev "
export DEB_BUILD_OPTIONS=parallel=`nproc`
# Floating bug with permissions:
[ -n "$CCACHEDIR" ] && sudo mkdir -p $CCACHEDIR
[ -n "$CCACHEDIR" ] && sudo chmod -R a+rwx $CCACHEDIR || true
# chown -R $BUILDUSERID:$BUILDUSERID $CCACHEDIR
# Do not create source package inside pbuilder (-b)
# Use current dir to make package (by default should have src archive)
# echo "3.0 (native)" > debian/source/format
# OR
# pdebuild -b --debbuildopts "--source-option=--format=\"3.0 (native)\""
# OR
DEBBUILDOPTS="-b --source-option=--format=\"3.0 (native)\""
HOOKDIR="debian/pbuilder-hooks"
#echo "DEBOOTSTRAPOPTS=${DEBOOTSTRAPOPTS[@]}"
#echo "ALLOWUNTRUSTED=${ALLOWUNTRUSTED} OTHERMIRROR=${OTHERMIRROR}"
#echo "EXTRAPACKAGES=${EXTRAPACKAGES}"

5
debian/changelog vendored
View File

@ -1,5 +0,0 @@
clickhouse (22.1.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Thu, 09 Dec 2021 00:32:58 +0300

5
debian/changelog.in vendored
View File

@ -1,5 +0,0 @@
clickhouse (@VERSION_STRING@) unstable; urgency=low
* Modified source code
-- @AUTHOR@ <@EMAIL@> @DATE@

View File

@ -1,7 +0,0 @@
usr/bin/clickhouse-client
usr/bin/clickhouse-local
usr/bin/clickhouse-compressor
usr/bin/clickhouse-benchmark
usr/bin/clickhouse-format
usr/bin/clickhouse-obfuscator
etc/clickhouse-client/config.xml

View File

@ -1,5 +0,0 @@
usr/bin/clickhouse
usr/bin/clickhouse-odbc-bridge
usr/bin/clickhouse-library-bridge
usr/bin/clickhouse-extract-from-config
usr/share/bash-completion/completions

View File

@ -1 +0,0 @@
#*/10 * * * * root ((which service > /dev/null 2>&1 && (service clickhouse-server condstart ||:)) || /etc/init.d/clickhouse-server condstart) > /dev/null 2>&1

View File

@ -1,4 +0,0 @@
LICENSE
AUTHORS
README.md
CHANGELOG.md

View File

@ -1,227 +0,0 @@
#!/bin/sh
### BEGIN INIT INFO
# Provides: clickhouse-server
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Should-Start: $time $network
# Should-Stop: $network
# Short-Description: clickhouse-server daemon
### END INIT INFO
#
# NOTES:
# - Should-* -- script can start if the listed facilities are missing, unlike Required-*
#
# For the documentation [1]:
#
# [1]: https://wiki.debian.org/LSBInitScripts
CLICKHOUSE_USER=clickhouse
CLICKHOUSE_GROUP=${CLICKHOUSE_USER}
SHELL=/bin/bash
PROGRAM=clickhouse-server
CLICKHOUSE_GENERIC_PROGRAM=clickhouse
CLICKHOUSE_PROGRAM_ENV=""
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
CLICKHOUSE_CONFDIR=/etc/$PROGRAM
CLICKHOUSE_LOGDIR=/var/log/clickhouse-server
CLICKHOUSE_LOGDIR_USER=root
CLICKHOUSE_DATADIR=/var/lib/clickhouse
if [ -d "/var/lock" ]; then
LOCALSTATEDIR=/var/lock
else
LOCALSTATEDIR=/run/lock
fi
if [ ! -d "$LOCALSTATEDIR" ]; then
mkdir -p "$LOCALSTATEDIR"
fi
CLICKHOUSE_BINDIR=/usr/bin
CLICKHOUSE_CRONFILE=/etc/cron.d/clickhouse-server
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
LOCKFILE=$LOCALSTATEDIR/$PROGRAM
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
CLICKHOUSE_PIDFILE="$CLICKHOUSE_PIDDIR/$PROGRAM.pid"
# CLICKHOUSE_STOP_TIMEOUT=60 # Disabled by default. Place to /etc/default/clickhouse if you need.
# Some systems lack "flock"
command -v flock >/dev/null && FLOCK=flock
# Override defaults from optional config file
test -f /etc/default/clickhouse && . /etc/default/clickhouse
die()
{
echo $1 >&2
exit 1
}
# Check that configuration file is Ok.
check_config()
{
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure.";
fi
}
initdb()
{
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}
start()
{
${CLICKHOUSE_GENERIC_PROGRAM} start --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}
stop()
{
${CLICKHOUSE_GENERIC_PROGRAM} stop --pid-path "${CLICKHOUSE_PIDDIR}"
}
restart()
{
${CLICKHOUSE_GENERIC_PROGRAM} restart --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}
forcestop()
{
${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}"
}
service_or_func()
{
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
systemctl $1 $PROGRAM
else
$1
fi
}
forcerestart()
{
forcestop
# Should not use 'start' function if systemd active
service_or_func start
}
use_cron()
{
# 1. running systemd
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
return 1
fi
# 2. disabled by config
if [ -z "$CLICKHOUSE_CRONFILE" ]; then
return 2
fi
return 0
}
# returns false if cron disabled (with systemd)
enable_cron()
{
use_cron && sed -i 's/^#*//' "$CLICKHOUSE_CRONFILE"
}
# returns false if cron disabled (with systemd)
disable_cron()
{
use_cron && sed -i 's/^#*/#/' "$CLICKHOUSE_CRONFILE"
}
is_cron_disabled()
{
use_cron || return 0
# Assumes that either no lines are commented or all lines are commented.
# Also please note, that currently cron file for ClickHouse has only one line (but some time ago there was more).
grep -q -E '^#' "$CLICKHOUSE_CRONFILE";
}
main()
{
# See how we were called.
EXIT_STATUS=0
case "$1" in
start)
service_or_func start && enable_cron
;;
stop)
disable_cron
service_or_func stop
;;
restart)
service_or_func restart && enable_cron
;;
forcestop)
disable_cron
forcestop
;;
forcerestart)
forcerestart && enable_cron
;;
reload)
service_or_func restart
;;
condstart)
service_or_func start
;;
condstop)
service_or_func stop
;;
condrestart)
service_or_func restart
;;
condreload)
service_or_func restart
;;
initdb)
initdb
;;
enable_cron)
enable_cron
;;
disable_cron)
disable_cron
;;
*)
echo "Usage: $0 {start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload|initdb}"
exit 2
;;
esac
exit $EXIT_STATUS
}
status()
{
${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}"
}
# Running commands without need of locking
case "$1" in
status)
status
exit 0
;;
esac
(
if $FLOCK -n 9; then
main "$@"
else
echo "Init script is already running" && exit 1
fi
) 9> $LOCKFILE

View File

@ -1,6 +0,0 @@
usr/bin/clickhouse-server
usr/bin/clickhouse-copier
usr/bin/clickhouse-report
etc/clickhouse-server/config.xml
etc/clickhouse-server/users.xml
etc/systemd/system/clickhouse-server.service

View File

@ -1,47 +0,0 @@
#!/bin/sh
set -e
# set -x
PROGRAM=clickhouse-server
CLICKHOUSE_USER=${CLICKHOUSE_USER:=clickhouse}
CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP:=${CLICKHOUSE_USER}}
# Please note that we don't support paths with whitespaces. This is rather ignorant.
CLICKHOUSE_CONFDIR=${CLICKHOUSE_CONFDIR:=/etc/clickhouse-server}
CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR:=/var/lib/clickhouse}
CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR:=/var/log/clickhouse-server}
CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
[ -f /etc/default/clickhouse ] && . /etc/default/clickhouse
if [ ! -f "/etc/debian_version" ]; then
not_deb_os=1
fi
if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
# if old rc.d service present - remove it
if [ -x "/etc/init.d/clickhouse-server" ] && [ -x "/usr/sbin/update-rc.d" ]; then
/usr/sbin/update-rc.d clickhouse-server remove
fi
/bin/systemctl daemon-reload
/bin/systemctl enable clickhouse-server
else
# If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server
if [ -x "/etc/init.d/clickhouse-server" ]; then
if [ -x "/usr/sbin/update-rc.d" ]; then
/usr/sbin/update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $?
else
echo # Other OS
fi
fi
fi
fi

View File

@ -1,27 +0,0 @@
[Unit]
Description=ClickHouse Server (analytic DBMS for big data)
Requires=network-online.target
# NOTE: that After/Wants=time-sync.target is not enough, you need to ensure
# that the time was adjusted already, if you use systemd-timesyncd you are
# safe, but if you use ntp or some other daemon, you should configure it
# additionaly.
After=time-sync.target network-online.target
Wants=time-sync.target
[Service]
Type=simple
User=clickhouse
Group=clickhouse
Restart=always
RestartSec=30
RuntimeDirectory=clickhouse-server
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
# Minus means that this file is optional.
EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
[Install]
# ClickHouse should not start from the rescue shell (rescue.target).
WantedBy=multi-user.target

1
debian/compat vendored
View File

@ -1 +0,0 @@
10

58
debian/control vendored
View File

@ -1,58 +0,0 @@
Source: clickhouse
Section: database
Priority: optional
Maintainer: Alexey Milovidov <milovidov@clickhouse.com>
Build-Depends: debhelper (>= 9),
cmake | cmake3,
ninja-build,
clang-13,
llvm-13,
lld-13,
libc6-dev,
tzdata
Standards-Version: 3.9.8
Package: clickhouse-client
Architecture: all
Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version})
Replaces: clickhouse-compressor
Conflicts: clickhouse-compressor
Description: Client binary for ClickHouse
ClickHouse is a column-oriented database management system
that allows generating analytical data reports in real time.
.
This package provides clickhouse-client , clickhouse-local and clickhouse-benchmark
Package: clickhouse-common-static
Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends}
Suggests: clickhouse-common-static-dbg
Replaces: clickhouse-common, clickhouse-server-base
Provides: clickhouse-common, clickhouse-server-base
Description: Common files for ClickHouse
ClickHouse is a column-oriented database management system
that allows generating analytical data reports in real time.
.
This package provides common files for both clickhouse server and client
Package: clickhouse-server
Architecture: all
Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}), adduser
Recommends: libcap2-bin
Replaces: clickhouse-server-common, clickhouse-server-base
Provides: clickhouse-server-common
Description: Server binary for ClickHouse
ClickHouse is a column-oriented database management system
that allows generating analytical data reports in real time.
.
This package provides clickhouse common configuration files
Package: clickhouse-common-static-dbg
Architecture: any
Section: debug
Priority: optional
Depends: ${misc:Depends}
Replaces: clickhouse-common-dbg
Conflicts: clickhouse-common-dbg
Description: debugging symbols for clickhouse-common-static
This package contains the debugging symbols for clickhouse-common.

132
debian/rules vendored
View File

@ -1,132 +0,0 @@
#!/usr/bin/make -f
# -*- makefile -*-
# Uncomment this to turn on verbose mode.
export DH_VERBOSE=1
# -pie only for static mode
export DEB_BUILD_MAINT_OPTIONS=hardening=-all
# because copy_headers.sh have hardcoded path to build/include_directories.txt
BUILDDIR = obj-$(DEB_HOST_GNU_TYPE)
CURDIR = $(shell pwd)
DESTDIR = $(CURDIR)/debian/tmp
DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH)
ifeq ($(CCACHE_PREFIX),distcc)
THREADS_COUNT=$(shell distcc -j)
endif
ifeq ($(THREADS_COUNT),)
THREADS_COUNT=$(shell nproc || grep -c ^processor /proc/cpuinfo || sysctl -n hw.ncpu || echo 4)
endif
DEB_BUILD_OPTIONS+=parallel=$(THREADS_COUNT)
ifndef ENABLE_TESTS
CMAKE_FLAGS += -DENABLE_TESTS=0
else
# To export binaries and from deb build we do not strip them. No need to run tests in deb build as we run them in CI
DEB_BUILD_OPTIONS+= nocheck
DEB_BUILD_OPTIONS+= nostrip
endif
ifndef MAKE_TARGET
MAKE_TARGET = clickhouse-bundle
endif
CMAKE_FLAGS += -DENABLE_UTILS=0
DEB_CC ?= $(shell which gcc-11 gcc-10 gcc-9 gcc | head -n1)
DEB_CXX ?= $(shell which g++-11 g++-10 g++-9 g++ | head -n1)
ifdef DEB_CXX
DEB_BUILD_GNU_TYPE := $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
DEB_HOST_GNU_TYPE := $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
ifeq ($(DEB_BUILD_GNU_TYPE),$(DEB_HOST_GNU_TYPE))
CC := $(DEB_CC)
CXX := $(DEB_CXX)
else ifeq (clang,$(findstring clang,$(DEB_CXX)))
# If we crosscompile with clang, it knows what to do
CC := $(DEB_CC)
CXX := $(DEB_CXX)
else
CC := $(DEB_HOST_GNU_TYPE)-$(DEB_CC)
CXX := $(DEB_HOST_GNU_TYPE)-$(DEB_CXX)
endif
endif
ifdef CXX
CMAKE_FLAGS += -DCMAKE_CXX_COMPILER=`which $(CXX)`
endif
ifdef CC
CMAKE_FLAGS += -DCMAKE_C_COMPILER=`which $(CC)`
endif
ifndef DISABLE_NINJA
NINJA=$(shell which ninja)
ifneq ($(NINJA),)
CMAKE_FLAGS += -GNinja
export MAKE=$(NINJA) $(NINJA_FLAGS)
endif
endif
ifndef DH_VERBOSE
CMAKE_FLAGS += -DCMAKE_VERBOSE_MAKEFILE=0
endif
# Useful for bulding on low memory systems
ifndef DISABLE_PARALLEL
DH_FLAGS += --parallel
else
THREADS_COUNT = 1
endif
%:
dh $@ $(DH_FLAGS) --buildsystem=cmake
override_dh_auto_configure:
dh_auto_configure -- $(CMAKE_FLAGS)
override_dh_auto_build:
# Fix for ninja. Do not add -O.
$(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET)
override_dh_auto_test:
ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
cd $(BUILDDIR) && ctest -j$(THREADS_COUNT) -V
endif
# Disable config.guess and config.sub update
override_dh_update_autotools_config:
override_dh_clean:
rm -rf debian/copyright debian/clickhouse-client.docs debian/clickhouse-common-static.docs
dh_clean # -X contrib
override_dh_strip:
#https://www.debian.org/doc/debian-policy/ch-source.html#debian-rules-and-deb-build-options
ifeq (,$(filter nostrip,$(DEB_BUILD_OPTIONS)))
dh_strip -pclickhouse-common-static --dbg-package=clickhouse-common-static-dbg
endif
override_dh_install:
# Making docs
cp LICENSE debian/copyright
ln -sf clickhouse-server.docs debian/clickhouse-client.docs
ln -sf clickhouse-server.docs debian/clickhouse-common-static.docs
# systemd compatibility
mkdir -p $(DESTDIR)/etc/systemd/system/
cp debian/clickhouse-server.service $(DESTDIR)/etc/systemd/system/
dh_install --list-missing --sourcedir=$(DESTDIR)
override_dh_auto_install:
env DESTDIR=$(DESTDIR) $(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) install
override_dh_shlibdeps:
true # We depend only on libc and dh_shlibdeps gives us wrong (too strict) dependency.
override_dh_builddeb:
dh_builddeb -- -Z gzip # Older systems don't have "xz", so use "gzip" instead.

View File

@ -1 +0,0 @@
3.0 (quilt)

View File

@ -1,9 +0,0 @@
tar-ignore
tar-ignore="build_*/*"
tar-ignore="workspace/*"
tar-ignore="contrib/poco/openssl/*"
tar-ignore="contrib/poco/gradle/*"
tar-ignore="contrib/poco/Data/SQLite/*"
tar-ignore="contrib/poco/PDF/*"
compression-level=3
compression=gzip

6
debian/watch vendored
View File

@ -1,6 +0,0 @@
version=4
opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)-stable\.tar\.gz%clickhouse-$1.tar.gz%" \
https://github.com/ClickHouse/ClickHouse/tags \
(?:.*?/)?v?(\d[\d.]*)-stable\.tar\.gz debian uupdate

View File

@ -362,19 +362,6 @@ function get_profiles
clickhouse-client --port $RIGHT_SERVER_PORT --query "select 1"
}
function build_log_column_definitions
{
# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
# absolutely atrocious way. This should be done by the file() function itself.
for x in {right,left}-{addresses,{query,query-thread,trace,{async-,}metric}-log}.tsv
do
paste -d' ' \
<(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
<(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
| tr '\n' ', ' | sed 's/,$//' > "$x.columns"
done
}
# Build and analyze randomization distribution for all queries.
function analyze_queries
{
@ -382,8 +369,6 @@ rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.t
rm -rf analyze ||:
mkdir analyze analyze/tmp ||:
build_log_column_definitions
# Split the raw test output into files suitable for analysis.
# To debug calculations only for a particular test, substitute a suitable
# wildcard here, e.g. `for test_file in modulo-raw.tsv`.
@ -422,12 +407,10 @@ create table partial_query_times engine File(TSVWithNamesAndTypes,
-- Process queries that were run normally, on both servers.
create view left_query_log as select *
from file('left-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "left-query-log.tsv.columns")');
from file('left-query-log.tsv', TSVWithNamesAndTypes);
create view right_query_log as select *
from file('right-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "right-query-log.tsv.columns")');
from file('right-query-log.tsv', TSVWithNamesAndTypes);
create view query_logs as
select 0 version, query_id, ProfileEvents,
@ -645,8 +628,6 @@ mkdir report report/tmp ||:
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv run-errors.tsv ||:
build_log_column_definitions
cat analyze/errors.log >> report/errors.log ||:
cat profile-errors.log >> report/errors.log ||:
@ -1028,8 +1009,7 @@ create table unstable_query_runs engine File(TSVWithNamesAndTypes,
;
create view query_log as select *
from file('$version-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "$version-query-log.tsv.columns")');
from file('$version-query-log.tsv', TSVWithNamesAndTypes);
create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
'unstable-run-metrics.$version.rep') as
@ -1057,8 +1037,7 @@ create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
array join v, n;
create view trace_log as select *
from file('$version-trace-log.tsv', TSVWithNamesAndTypes,
'$(cat "$version-trace-log.tsv.columns")');
from file('$version-trace-log.tsv', TSVWithNamesAndTypes);
create view addresses_src as select addr,
-- Some functions change name between builds, e.g. '__clone' or 'clone' or
@ -1067,8 +1046,7 @@ create view addresses_src as select addr,
[name, 'clone.S (filtered by script)', 'pthread_cond_timedwait (filtered by script)']
-- this line is a subscript operator of the above array
[1 + multiSearchFirstIndex(name, ['clone.S', 'pthread_cond_timedwait'])] name
from file('$version-addresses.tsv', TSVWithNamesAndTypes,
'$(cat "$version-addresses.tsv.columns")');
from file('$version-addresses.tsv', TSVWithNamesAndTypes);
create table addresses_join_$version engine Join(any, left, address) as
select addr address, name from addresses_src;
@ -1195,15 +1173,12 @@ done
function report_metrics
{
build_log_column_definitions
rm -rf metrics ||:
mkdir metrics
clickhouse-local --query "
create view right_async_metric_log as
select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes,
'$(cat right-async-metric-log.tsv.columns)')
select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes)
;
-- Use the right log as time reference because it may have higher precision.
@ -1211,8 +1186,7 @@ create table metrics engine File(TSV, 'metrics/metrics.tsv') as
with (select min(event_time) from right_async_metric_log) as min_time
select metric, r.event_time - min_time event_time, l.value as left, r.value as right
from right_async_metric_log r
asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes,
'$(cat left-async-metric-log.tsv.columns)') l
asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes) l
on l.metric = r.metric and r.event_time <= l.event_time
order by metric, event_time
;

View File

@ -83,15 +83,15 @@ def make_query_command(query):
def prepare_for_hung_check(drop_databases):
# FIXME this function should not exist, but...
# ThreadFuzzer significantly slows down server and causes false-positive hung check failures
call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'")
# We attach gdb to clickhouse-server before running tests
# to print stacktraces of all crashes even if clickhouse cannot print it for some reason.
# However, it obstruct checking for hung queries.
logging.info("Will terminate gdb (if any)")
call_with_retry("kill -TERM $(pidof gdb)")
# ThreadFuzzer significantly slows down server and causes false-positive hung check failures
call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'")
call_with_retry(make_query_command('SELECT 1 FORMAT Null'))
# Some tests execute SYSTEM STOP MERGES or similar queries.

View File

@ -159,6 +159,10 @@ $ clickhouse-client --query "select count(*) from datasets.ontime"
!!! info "Info"
If you will run the queries described below, you have to use the full table name, `datasets.ontime`.
!!! info "Info"
If you are using the prepared partitions or the Online Playground replace any occurrence of `IATA_CODE_Reporting_Airline` or `IATA_CODE_Reporting_Airline AS Carrier` in the following queries with `Carrier` (see `describe ontime`).
## Queries {#queries}
Q0.

View File

@ -124,7 +124,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
- `--time, -t` If specified, print the query execution time to stderr in non-interactive mode.
- `--stacktrace` If specified, also print the stack trace if an exception occurs.
- `--config-file` The name of the configuration file.
- `--secure` If specified, will connect to server over secure connection.
- `--secure` If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl).
- `--history_file` — Path to a file containing command history.
- `--param_<name>` — Value for a [query with parameters](#cli-queries-with-parameters).
- `--hardware-utilization` — Print hardware utilization information in progress bar.
@ -148,7 +148,12 @@ Example of a config file:
<config>
<user>username</user>
<password>password</password>
<secure>False</secure>
<secure>true</secure>
<openSSL>
<client>
<caConfig>/etc/ssl/cert.pem</caConfig>
</client>
</openSSL>
</config>
```

View File

@ -5,11 +5,10 @@ toc_title: HTTP Interface
# HTTP Interface {#http-interface}
The HTTP interface lets you use ClickHouse on any platform from any programming language. We use it for working from Java and Perl, as well as shell scripts. In other departments, the HTTP interface is used from Perl, Python, and Go. The HTTP interface is more limited than the native interface, but it has better compatibility.
The HTTP interface lets you use ClickHouse on any platform from any programming language in a form of REST API. The HTTP interface is more limited than the native interface, but it has better language support.
By default, `clickhouse-server` listens for HTTP on port 8123 (this can be changed in the config).
Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples.
HTTPS can be enabled as well with port 8443 by default.
If you make a `GET /` request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end)
@ -18,11 +17,12 @@ $ curl 'http://localhost:8123/'
Ok.
```
Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples.
Web UI can be accessed here: `http://localhost:8123/play`.
![Web UI](../images/play.png)
In health-check scripts use `GET /ping` request. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13. See also `/replicas_status` to check replica's delay.
``` bash
@ -32,7 +32,7 @@ $ curl 'http://localhost:8123/replicas_status'
Ok.
```
Send the request as a URL query parameter, or as a POST. Or send the beginning of the query in the query parameter, and the rest in the POST (well explain later why this is necessary). The size of the URL is limited to 16 KB, so keep this in mind when sending large queries.
Send the request as a URL query parameter, or as a POST. Or send the beginning of the query in the query parameter, and the rest in the POST (well explain later why this is necessary). The size of the URL is limited to 1 MiB by default, this can be changed with the `http_max_uri_size` setting.
If successful, you receive the 200 response code and the result in the response body.
If an error occurs, you receive the 500 response code and an error description text in the response body.

View File

@ -28,6 +28,7 @@ toc_title: Adopters
| <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | 1.6 mln events/sec (2018) | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/forecast.pdf) |
| <a href="https://beeline.ru/" class="favicon">Beeline</a> | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) |
| <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
| <a href="https://betterstack.com/" class="favicon">Better Stack</a> | Cloud, SaaS | Log Management | - | - | [Official Website](https://betterstack.com/logtail) |
| <a href="https://www.bigo.sg/" class="favicon">BIGO</a> | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) |
| <a href="https://www.bilibili.com/" class="favicon">BiliBili</a> | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) |
| <a href="https://www.bloomberg.com/">Bloomberg</a> | Finance, Media | Monitoring | — | — | [Job opening, September 2021](https://careers.bloomberg.com/job/detail/94913), [slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
@ -112,7 +113,7 @@ toc_title: Adopters
| <a href="https://nlmk.com/en/" class="favicon">NLMK</a> | Steel | Monitoring | — | — | [Article in Russian, Jan 2022](https://habr.com/en/company/nlmk/blog/645943/) |
| <a href="https://getnoc.com/" class="favicon">NOC Project</a> | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) |
| <a href="https://www.noction.com" class="favicon">Noction</a> | Network Technology | Main Product | — | — | [Official Website](https://www.noction.com/news/irp-3-11-remote-triggered-blackholing-capability)
| <a href="https://www.ntop.org/" class="favicon">ntop</a> | Network Monitoning | Monitoring | — | — | [Official website, Jan 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) |
| <a href="https://www.ntop.org/" class="favicon">ntop</a> | Network Monitoning | Monitoring | — | — | [Official website, January 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) |
| <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a> | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) |
| <a href="https://ok.ru" class="favicon">Ok.ru</a> | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, October 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) |
| <a href="https://omnicomm.ru/" class="favicon">Omnicomm</a> | Transportation Monitoring | — | — | — | [Facebook post, October 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) |
@ -123,6 +124,7 @@ toc_title: Adopters
| <a href="https://panelbear.com/" class="favicon">Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) |
| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
| <a href="https://www.percona.com/" class="favicon">Percona</a> | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) |
| <a href="https://pingcap.com/" class="favicon">PingCAP</a> | Analytics | Real-Time Transactional and Analytical Processing | - | - | [GitHub, TiFlash/TiDB](https://github.com/pingcap/tiflash) |
| <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
| <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, October 2020](https://posthog.com/blog/the-posthog-array-1-15-0), [Blog, November 2021](https://posthog.com/blog/how-we-turned-clickhouse-into-our-eventmansion) |
| <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
@ -159,6 +161,7 @@ toc_title: Adopters
| <a href="https://www.suning.com/" class="favicon">Suning</a> | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) |
| <a href="https://superwall.me/" class="favicon">Superwall</a> | Monetization Tooling | Main product | — | — | [Word of mouth, Jan 2022](https://github.com/ClickHouse/ClickHouse/pull/33573) |
| <a href="https://swetrix.com" class="favicon">Swetrix</a> | Analytics | Main Product | — | — | [Source code](https://github.com/swetrix/swetrix-api) |
| <a href="https://synpse.net/" class="favicon">Synpse</a> | Application Management | Main Product | - | - | [Tweet, January 2022](https://twitter.com/KRusenas/status/1483571168363880455) |
| <a href="https://www.teralytics.net/" class="favicon">Teralytics</a> | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
@ -172,6 +175,7 @@ toc_title: Adopters
| <a href="https://hello.utmstat.com/" class="favicon">UTMSTAT</a> | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) |
| <a href="https://vercel.com/" class="favicon">Vercel</a> | Traffic and Performance Analytics | — | — | — | Direct reference, October 2021 |
| <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
| <a href="https://vkontech.com/" class="favicon">VKontech</a> | Distributed Systems | Migrating from MongoDB | - | - | [Blog, January 2022](https://vkontech.com/migrating-your-reporting-queries-from-a-general-purpose-db-mongodb-to-a-data-warehouse-clickhouse-performance-overview/) |
| <a href="https://www.vmware.com/" class="favicon">VMware</a> | Cloud | VeloCloud, SDN | — | — | [Product documentation](https://docs.vmware.com/en/vRealize-Operations-Manager/8.3/com.vmware.vcom.metrics.doc/GUID-A9AD72E1-C948-4CA2-971B-919385AB3CA8.html) |
| <a href="https://www.walmartlabs.com/" class="favicon">Walmart Labs</a> | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) |
| <a href="https://wargaming.com/en/" class="favicon">Wargaming</a> | Games | | — | — | [Interview](https://habr.com/en/post/496954/) |
@ -197,5 +201,6 @@ toc_title: Adopters
| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
| <a href="https://magenta-technology.ru/sistema-upravleniya-marshrutami-inkassacii-as-strela/" class="favicon">АС "Стрела"</a> | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) |
| <a href="https://piwik.pro/" class="favicon">Piwik PRO</a> | Web Analytics | — | — | — | [Official website, Dec 2018](https://piwik.pro/blog/piwik-pro-clickhouse-faster-efficient-reports/) |
| <a href="https://www.deepglint.com/" class="favicon">Deepglint 格灵深瞳</a> | AI, Computer Vision | OLAP | — | — | [Official Website](https://www.deepglint.com/) |
[Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->

View File

@ -3,13 +3,10 @@ toc_priority: 66
toc_title: ClickHouse Keeper
---
# [pre-production] ClickHouse Keeper {#clickHouse-keeper}
# ClickHouse Keeper {#clickHouse-keeper}
ClickHouse server uses [ZooKeeper](https://zookeeper.apache.org/) coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is an alternative coordination system compatible with ZooKeeper.
!!! warning "Warning"
This feature is currently in the pre-production stage. We test it in our CI and on small internal installations.
## Implementation details {#implementation-details}
ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, has quite a simple and powerful data model. ZooKeeper's coordination algorithm called ZAB (ZooKeeper Atomic Broadcast) doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper ClickHouse Keeper is written in C++ and uses [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows to have linearizability for reads and writes, has several open-source implementations in different languages.

View File

@ -941,30 +941,30 @@ For more information, see the MergeTreeSettings.h header file.
SSL client/server configuration.
Support for SSL is provided by the `libpoco` library. The interface is described in the file [SSLManager.h](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h)
Support for SSL is provided by the `libpoco` library. The available configuration options are explained in [SSLManager.h](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h). Default values can be found in [SSLManager.cpp](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/src/SSLManager.cpp).
Keys for server/client settings:
- privateKeyFile The path to the file with the secret key of the PEM certificate. The file may contain a key and certificate at the same time.
- certificateFile The path to the client/server certificate file in PEM format. You can omit it if `privateKeyFile` contains the certificate.
- caConfig The path to the file or directory that contains trusted root certificates.
- verificationMode The method for checking the nodes certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`.
- verificationDepth The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value.
- loadDefaultCAFile Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `true`, `false`. \|
- cipherList Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`.
- cacheSessions Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`.
- sessionIdContext A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`.
- sessionCacheSize The maximum number of sessions that the server caches. Default value: 1024\*20. 0 Unlimited sessions.
- sessionTimeout Time for caching the session on the server.
- extendedVerification Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`.
- requireTLSv1 Require a TLSv1 connection. Acceptable values: `true`, `false`.
- requireTLSv1_1 Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
- requireTLSv1_2 Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
- fips Activates OpenSSL FIPS mode. Supported if the librarys OpenSSL version supports FIPS.
- privateKeyPassphraseHandler Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: `<privateKeyPassphraseHandler>`, `<name>KeyFileHandler</name>`, `<options><password>test</password></options>`, `</privateKeyPassphraseHandler>`.
- invalidCertificateHandler Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: `<invalidCertificateHandler> <name>ConsoleCertificateHandler</name> </invalidCertificateHandler>` .
- disableProtocols Protocols that are not allowed to use.
- preferServerCiphers Preferred server ciphers on the client.
- caConfig (default: none) The path to the file or directory that contains trusted CA certificates. If this points to a file, it must be in PEM format and can contain several CA certificates. If this points to a directory, it must contain one .pem file per CA certificate. The filenames are looked up by the CA subject name hash value. Details can be found in the man page of [SSL_CTX_load_verify_locations](https://www.openssl.org/docs/man3.0/man3/SSL_CTX_load_verify_locations.html).
- verificationMode (default: relaxed) The method for checking the nodes certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`.
- verificationDepth (default: 9) The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value.
- loadDefaultCAFile (default: true) Wether built-in CA certificates for OpenSSL will be used. ClickHouse assumes that builtin CA certificates are in the file `/etc/ssl/cert.pem` (resp. the directory `/etc/ssl/certs`) or in file (resp. directory) specified by the environment variable `SSL_CERT_FILE` (resp. `SSL_CERT_DIR`).
- cipherList (default: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`) - Supported OpenSSL encryptions.
- cacheSessions (default: false) Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`.
- sessionIdContext (default: `${application.name}`) A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`.
- sessionCacheSize (default: [1024\*20](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1978)) The maximum number of sessions that the server caches. A value of 0 means unlimited sessions.
- sessionTimeout (default: [2h](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1926)) Time for caching the session on the server.
- extendedVerification (default: false) If enabled, verify that the certificate CN or SAN matches the peer hostname.
- requireTLSv1 (default: false) Require a TLSv1 connection. Acceptable values: `true`, `false`.
- requireTLSv1_1 (default: false) Require a TLSv1.1 connection. Acceptable values: `true`, `false`.
- requireTLSv1_2 (default: false) Require a TLSv1.2 connection. Acceptable values: `true`, `false`.
- fips (default: false) Activates OpenSSL FIPS mode. Supported if the librarys OpenSSL version supports FIPS.
- privateKeyPassphraseHandler (default: `KeyConsoleHandler`) Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: `<privateKeyPassphraseHandler>`, `<name>KeyFileHandler</name>`, `<options><password>test</password></options>`, `</privateKeyPassphraseHandler>`.
- invalidCertificateHandler (default: `ConsoleCertificateHandler`) Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: `<invalidCertificateHandler> <name>ConsoleCertificateHandler</name> </invalidCertificateHandler>` .
- disableProtocols (default: "") Protocols that are not allowed to use.
- preferServerCiphers (default: false) Preferred server ciphers on the client.
**Example of settings:**

View File

@ -11,10 +11,6 @@ To work with data stored on `Amazon S3` disks use [S3](../engines/table-engines/
To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver).
## Zero-copy Replication {#zero-copy}
ClickHouse supports zero-copy replication for `S3` and `HDFS` disks, which means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.
## Configuring HDFS {#configuring-hdfs}
[MergeTree](../engines/table-engines/mergetree-family/mergetree.md) and [Log](../engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`.
@ -316,3 +312,8 @@ When loading files by `endpoint`, they must be loaded into `<endpoint>/store/` p
If URL is not reachable on disk load when the server is starting up tables, then all errors are caught. If in this case there were errors, tables can be reloaded (become visible) via `DETACH TABLE table_name` -> `ATTACH TABLE table_name`. If metadata was successfully loaded at server startup, then tables are available straight away.
Use [http_max_single_read_retries](../operations/settings/settings.md#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read.
## Zero-copy Replication (not ready for production) {#zero-copy}
ClickHouse supports zero-copy replication for `S3` and `HDFS` disks, which means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.

View File

@ -182,7 +182,7 @@ Marks numbers: 0 1 2 3 4 5 6 7 8
Разреженный индекс допускает чтение лишних строк. При чтении одного диапазона первичного ключа, может быть прочитано до `index_granularity * 2` лишних строк в каждом блоке данных.
Разреженный индекс почти всегда помещаеся в оперативную память и позволяет работать с очень большим количеством строк в таблицах.
Разреженный индекс почти всегда помещается в оперативную память и позволяет работать с очень большим количеством строк в таблицах.
ClickHouse не требует уникального первичного ключа. Можно вставить много строк с одинаковым первичным ключом.

View File

@ -5,9 +5,9 @@ toc_title: Nothing
# Nothing {#nothing}
Этот тип данных предназначен только для того, чтобы представлять [NULL](../../../sql-reference/data-types/special-data-types/nothing.md), т.е. отсутствие значения.
Этот тип данных предназначен только для того, чтобы представлять [NULL](../../../sql-reference/syntax.md#null-literal), т.е. отсутствие значения.
Невозможно создать значение типа `Nothing`, поэтому он используется там, где значение не подразумевается. Например, `NULL` записывается как `Nullable(Nothing)` ([Nullable](../../../sql-reference/data-types/special-data-types/nothing.md) — это тип данных, позволяющий хранить `NULL` в таблицах). Также тип `Nothing` используется для обозначения пустых массивов:
Невозможно создать значение типа `Nothing`, поэтому он используется там, где значение не подразумевается. Например, `NULL` записывается как `Nullable(Nothing)` ([Nullable](../../../sql-reference/data-types/nullable.md) — это тип данных, позволяющий хранить `NULL` в таблицах). Также тип `Nothing` используется для обозначения пустых массивов:
``` sql
SELECT toTypeName(Array())

View File

@ -2,7 +2,7 @@
此数据类型的唯一目的是表示不是期望值的情况。 所以不能创建一个 `Nothing` 类型的值。
例如,文本 [NULL](../../../sql-reference/data-types/special-data-types/nothing.md#null-literal) 的类型为 `Nullable(Nothing)`。详情请见 [可为空](../../../sql-reference/data-types/special-data-types/nothing.md)。
例如,字面量 [NULL](../../../sql-reference/syntax.md#null-literal) 的类型为 `Nullable(Nothing)`。详情请见 [可为空](../../../sql-reference/data-types/nullable.md)。
`Nothing` 类型也可以用来表示空数组:

View File

@ -66,40 +66,40 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
using namespace DB;
namespace po = boost::program_options;
po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
desc.add_options()
("help,h", "produce help message")
("input", po::value<std::string>()->value_name("INPUT"), "input file")
("output", po::value<std::string>()->value_name("OUTPUT"), "output file")
("decompress,d", "decompress")
("offset-in-compressed-file", po::value<size_t>()->default_value(0ULL), "offset to the compressed block (i.e. physical file offset)")
("offset-in-decompressed-block", po::value<size_t>()->default_value(0ULL), "offset to the decompressed block (i.e. virtual offset)")
("block-size,b", po::value<unsigned>()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size")
("hc", "use LZ4HC instead of LZ4")
("zstd", "use ZSTD instead of LZ4")
("codec", po::value<std::vector<std::string>>()->multitoken(), "use codecs combination instead of LZ4")
("level", po::value<int>(), "compression level for codecs specified via flags")
("none", "use no compression instead of LZ4")
("stat", "print block statistics of compressed data")
;
po::positional_options_description positional_desc;
positional_desc.add("input", 1);
positional_desc.add("output", 1);
po::variables_map options;
po::store(po::command_line_parser(argc, argv).options(desc).positional(positional_desc).run(), options);
if (options.count("help"))
{
std::cout << "Usage: " << argv[0] << " [options] < INPUT > OUTPUT" << std::endl;
std::cout << "Usage: " << argv[0] << " [options] INPUT OUTPUT" << std::endl;
std::cout << desc << std::endl;
return 0;
}
try
{
po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
desc.add_options()
("help,h", "produce help message")
("input", po::value<std::string>()->value_name("INPUT"), "input file")
("output", po::value<std::string>()->value_name("OUTPUT"), "output file")
("decompress,d", "decompress")
("offset-in-compressed-file", po::value<size_t>()->default_value(0ULL), "offset to the compressed block (i.e. physical file offset)")
("offset-in-decompressed-block", po::value<size_t>()->default_value(0ULL), "offset to the decompressed block (i.e. virtual offset)")
("block-size,b", po::value<unsigned>()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size")
("hc", "use LZ4HC instead of LZ4")
("zstd", "use ZSTD instead of LZ4")
("codec", po::value<std::vector<std::string>>()->multitoken(), "use codecs combination instead of LZ4")
("level", po::value<int>(), "compression level for codecs specified via flags")
("none", "use no compression instead of LZ4")
("stat", "print block statistics of compressed data")
;
po::positional_options_description positional_desc;
positional_desc.add("input", 1);
positional_desc.add("output", 1);
po::variables_map options;
po::store(po::command_line_parser(argc, argv).options(desc).positional(positional_desc).run(), options);
if (options.count("help"))
{
std::cout << "Usage: " << argv[0] << " [options] < INPUT > OUTPUT" << std::endl;
std::cout << "Usage: " << argv[0] << " [options] INPUT OUTPUT" << std::endl;
std::cout << desc << std::endl;
return 0;
}
bool decompress = options.count("decompress");
bool use_lz4hc = options.count("hc");
bool use_zstd = options.count("zstd");

View File

@ -44,40 +44,40 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
{
using namespace DB;
boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
desc.add_options()
("query", po::value<std::string>(), "query to format")
("help,h", "produce help message")
("hilite", "add syntax highlight with ANSI terminal escape sequences")
("oneline", "format in single line")
("quiet,q", "just check syntax, no output on success")
("multiquery,n", "allow multiple queries in the same file")
("obfuscate", "obfuscate instead of formatting")
("backslash", "add a backslash at the end of each line of the formatted query")
("allow_settings_after_format_in_insert", "Allow SETTINGS after FORMAT, but note, that this is not always safe")
("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")
;
Settings cmd_settings;
for (const auto & field : cmd_settings.all())
{
if (field.getName() == "max_parser_depth" || field.getName() == "max_query_size")
cmd_settings.addProgramOption(desc, field);
}
boost::program_options::variables_map options;
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
po::notify(options);
if (options.count("help"))
{
std::cout << "Usage: " << argv[0] << " [options] < query" << std::endl;
std::cout << desc << std::endl;
return 1;
}
try
{
boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
desc.add_options()
("query", po::value<std::string>(), "query to format")
("help,h", "produce help message")
("hilite", "add syntax highlight with ANSI terminal escape sequences")
("oneline", "format in single line")
("quiet,q", "just check syntax, no output on success")
("multiquery,n", "allow multiple queries in the same file")
("obfuscate", "obfuscate instead of formatting")
("backslash", "add a backslash at the end of each line of the formatted query")
("allow_settings_after_format_in_insert", "Allow SETTINGS after FORMAT, but note, that this is not always safe")
("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")
;
Settings cmd_settings;
for (const auto & field : cmd_settings.all())
{
if (field.getName() == "max_parser_depth" || field.getName() == "max_query_size")
cmd_settings.addProgramOption(desc, field);
}
boost::program_options::variables_map options;
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
po::notify(options);
if (options.count("help"))
{
std::cout << "Usage: " << argv[0] << " [options] < query" << std::endl;
std::cout << desc << std::endl;
return 1;
}
bool hilite = options.count("hilite");
bool oneline = options.count("oneline");
bool quiet = options.count("quiet");

View File

@ -1231,5 +1231,5 @@ try
catch (...)
{
std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
throw;
return DB::getCurrentExceptionCode();
}

View File

@ -13,6 +13,8 @@
#include <tuple>
#include <utility> /// pair
#include <fmt/format.h>
#include "config_tools.h"
#include <Common/StringUtils/StringUtils.h>
@ -332,6 +334,20 @@ struct Checker
#endif
;
void checkHarmfulEnvironmentVariables()
{
/// The list is a selection from "man ld-linux". And one variable that is Mac OS X specific.
/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete.
for (const auto * var : {"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_ORIGIN_PATH", "LD_AUDIT", "LD_DYNAMIC_WEAK", "DYLD_INSERT_LIBRARIES"})
{
if (const char * value = getenv(var); value && value[0])
{
std::cerr << fmt::format("Environment variable {} is set to {}. It can compromise security.\n", var, value);
_exit(1);
}
}
}
}
@ -352,6 +368,8 @@ int main(int argc_, char ** argv_)
inside_main = true;
SCOPE_EXIT({ inside_main = false; });
checkHarmfulEnvironmentVariables();
/// Reset new handler to default (that throws std::bad_alloc)
/// It is needed because LLVM library clobbers it.
std::set_new_handler(nullptr);

View File

@ -1716,6 +1716,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
return Application::EXIT_OK;
}
void Server::createServers(
Poco::Util::AbstractConfiguration & config,
const std::vector<std::string> & listen_hosts,

View File

@ -589,7 +589,7 @@
stats.innerText = `Elapsed: ${seconds} sec, read ${formatted_rows} rows, ${formatted_bytes}.`;
/// We can also render graphs if user performed EXPLAIN PIPELINE graph=1 or EXPLAIN AST graph = 1
if (response.data.length > 3 && response.data[0][0].startsWith("digraph") && document.getElementById('query').value.match(/^\s*EXPLAIN/i)) {
if (response.data.length > 3 && document.getElementById('query').value.match(/^\s*EXPLAIN/i) && typeof(response.data[0][0]) === "string" && response.data[0][0].startsWith("digraph")) {
renderGraph(response);
} else {
renderTable(response);

99
release
View File

@ -1,99 +0,0 @@
#!/usr/bin/env bash
# If you have "no space left" error, you can change the location of temporary files with BUILDPLACE environment variable.
# Version increment:
# Default release: 18.1.2 -> 18.2.0:
# ./release --version
# or
# ./release --version minor
# Bugfix release (only with small patches to previous release): 18.1.2 -> 18.1.3:
# ./release --version patch
# Do this once per year: 18.1.2 -> 19.0.0:
# ./release --version major
set -e
# Avoid dependency on locale
LC_ALL=C
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
cd $CUR_DIR
source "./utils/release/release_lib.sh"
DEBUILD_NOSIGN_OPTIONS="-us -uc"
DEBUILD_NODEPS_OPTIONS="-d"
if [ -z "$VERSION_STRING" ] ; then
get_revision_author
fi
while [[ $1 == --* ]]
do
if [[ $1 == '--test' ]]; then
TEST='yes'
VERSION_POSTFIX+=+test
shift
elif [[ $1 == '--check-build-dependencies' ]]; then
DEBUILD_NODEPS_OPTIONS=""
shift
elif [[ $1 == '--version' ]]; then
gen_revision_author $2
exit 0
elif [[ $1 == '--rpm' ]]; then
MAKE_RPM=1
shift
elif [[ $1 == '--tgz' ]]; then
MAKE_TGZ=1
shift
else
echo "Unknown option $1"
exit 2
fi
done
# Build options
if [ -n "$SANITIZER" ]
then
if [[ "$SANITIZER" == "address" ]]; then VERSION_POSTFIX+="+asan"
elif [[ "$SANITIZER" == "thread" ]]; then VERSION_POSTFIX+="+tsan"
elif [[ "$SANITIZER" == "memory" ]]; then VERSION_POSTFIX+="+msan"
elif [[ "$SANITIZER" == "undefined" ]]; then VERSION_POSTFIX+="+ubsan"
else
echo "Unknown value of SANITIZER variable: $SANITIZER"
exit 3
fi
export DEB_CC=${DEB_CC=clang-10}
export DEB_CXX=${DEB_CXX=clang++-10}
EXTRAPACKAGES="$EXTRAPACKAGES clang-10 lld-10"
elif [[ $BUILD_TYPE == 'debug' ]]; then
CMAKE_BUILD_TYPE=Debug
VERSION_POSTFIX+="+debug"
fi
CMAKE_FLAGS=" $MALLOC_OPTS -DSANITIZE=$SANITIZER -DENABLE_CHECK_HEAVY_BUILDS=1 $CMAKE_FLAGS"
[[ -n "$CMAKE_BUILD_TYPE" ]] && CMAKE_FLAGS=" -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE $CMAKE_FLAGS"
export CMAKE_FLAGS
export EXTRAPACKAGES
VERSION_STRING+=$VERSION_POSTFIX
echo -e "\nCurrent version is $VERSION_STRING"
if [ -z "$NO_BUILD" ] ; then
gen_changelog "$VERSION_STRING" "" "$AUTHOR" ""
# Build (only binary packages).
debuild --preserve-env -e PATH \
-e DEB_CC=$DEB_CC -e DEB_CXX=$DEB_CXX -e CMAKE_FLAGS="$CMAKE_FLAGS" \
-b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS} ${DEB_ARCH_FLAG}
fi
if [ -n "$MAKE_RPM" ]; then
make_rpm
fi
if [ -n "$MAKE_TGZ" ]; then
make_tgz
fi

View File

@ -327,7 +327,8 @@ void DiskAccessStorage::scheduleWriteLists(AccessEntityType type)
/// Create the 'need_rebuild_lists.mark' file.
/// This file will be used later to find out if writing lists is successful or not.
std::ofstream{getNeedRebuildListsMarkFilePath(directory_path)};
std::ofstream out{getNeedRebuildListsMarkFilePath(directory_path)};
out.close();
lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this};
lists_writing_thread_is_waiting = true;

View File

@ -39,7 +39,7 @@ bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
}
template <class First, class ... TArgs>
static IAggregateFunction * create(const IDataType & second_type, TArgs && ... args)
IAggregateFunction * create(const IDataType & second_type, TArgs && ... args)
{
const WhichDataType which(second_type);
@ -51,7 +51,7 @@ static IAggregateFunction * create(const IDataType & second_type, TArgs && ... a
// Not using helper functions because there are no templates for binary decimal/numeric function.
template <class... TArgs>
static IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
{
const WhichDataType which(first_type);

View File

@ -30,7 +30,7 @@ AggregateFunctionPtr createAggregateFunctionDeltaSum(
throw Exception("Incorrect number of arguments for aggregate function " + name,
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
DataTypePtr data_type = arguments[0];
const DataTypePtr & data_type = arguments[0];
if (isInteger(data_type) || isFloat(data_type))
return AggregateFunctionPtr(createWithNumericType<AggregationFunctionDeltaSum>(

View File

@ -20,7 +20,7 @@ namespace
{
template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
static IAggregateFunction * createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
IAggregateFunction * createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::Date) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);

View File

@ -19,7 +19,7 @@ namespace ErrorCodes
namespace
{
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> typename Data, typename... TArgs>
static IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs &&... args)
IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs &&... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);

View File

@ -40,7 +40,7 @@ public:
};
template <typename HasLimit, typename ... TArgs>
static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, TArgs && ... args)
IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, TArgs && ... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::Date) return new AggregateFunctionGroupUniqArrayDate<HasLimit>(argument_type, std::forward<TArgs>(args)...);

View File

@ -56,7 +56,7 @@ private:
/// The name of the nested function, including combinators (i.e. *If)
///
/// getName() from the nested_function cannot be used because in case of *If combinator
/// with Nullable argument nested_function will point to the function w/o combinator.
/// with Nullable argument nested_function will point to the function without combinator.
/// (I.e. sumIf(Nullable, 1) -> sum()), and distributed query processing will fail.
///
/// And nested_function cannot point to the function with *If since

View File

@ -60,7 +60,7 @@ template <typename Value, bool float_return> using FuncQuantileBFloat16Weighted
template <typename Value, bool float_return> using FuncQuantilesBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, true>;
template <template <typename, bool> class Function>
static constexpr bool supportDecimal()
constexpr bool supportDecimal()
{
return std::is_same_v<Function<Float32, false>, FuncQuantile<Float32, false>> ||
std::is_same_v<Function<Float32, false>, FuncQuantiles<Float32, false>> ||
@ -75,7 +75,7 @@ static constexpr bool supportDecimal()
}
template <template <typename, bool> class Function>
static constexpr bool supportBigInt()
constexpr bool supportBigInt()
{
return std::is_same_v<Function<Float32, false>, FuncQuantile<Float32, false>> ||
std::is_same_v<Function<Float32, false>, FuncQuantiles<Float32, false>> ||

View File

@ -19,7 +19,7 @@ namespace
{
template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
static IAggregateFunction * createWithUIntegerOrTimeType(const std::string & name, const IDataType & argument_type, TArgs && ... args)
IAggregateFunction * createWithUIntegerOrTimeType(const std::string & name, const IDataType & argument_type, TArgs && ... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::Date || which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);

View File

@ -24,7 +24,7 @@ AggregateFunctionPtr createAggregateFunctionStatisticsUnary(
assertUnary(name, argument_types);
AggregateFunctionPtr res;
DataTypePtr data_type = argument_types[0];
const DataTypePtr & data_type = argument_types[0];
if (isDecimal(data_type))
res.reset(createWithDecimalType<FunctionTemplate>(*data_type, *data_type, argument_types));
else

View File

@ -56,7 +56,7 @@ AggregateFunctionPtr createAggregateFunctionSum(const std::string & name, const
assertUnary(name, argument_types);
AggregateFunctionPtr res;
DataTypePtr data_type = argument_types[0];
const DataTypePtr & data_type = argument_types[0];
if (isDecimal(data_type))
res.reset(createWithDecimalType<Function>(*data_type, *data_type, argument_types));
else

View File

@ -29,7 +29,7 @@ createAggregateFunctionSumCount(const std::string & name, const DataTypes & argu
assertUnary(name, argument_types);
AggregateFunctionPtr res;
DataTypePtr data_type = argument_types[0];
const DataTypePtr & data_type = argument_types[0];
if (!allowType(data_type))
throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

View File

@ -44,7 +44,7 @@ class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateT
template <bool is_weighted>
static IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 load_factor, const Array & params)
IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 load_factor, const Array & params)
{
if (argument_types.empty())
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Got empty arguments list");

View File

@ -59,7 +59,7 @@
/** This hash function is not the most optimal, but UniquesHashSet states counted with it,
* stored in many places on disks (in the Yandex.Metrika), so it continues to be used.
* stored in many places on disks (in many companies), so it continues to be used.
*/
struct UniquesHashSetDefaultHash
{

View File

@ -954,6 +954,9 @@ void ClientBase::onProfileEvents(Block & block)
auto elapsed_time = profile_events.watch.elapsedMicroseconds();
progress_indication.updateThreadEventData(thread_times, elapsed_time);
if (need_render_progress)
progress_indication.writeProgress();
if (profile_events.print)
{
if (profile_events.watch.elapsedMilliseconds() >= profile_events.delay_ms)
@ -1055,7 +1058,13 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
/// Process the query that requires transferring data blocks to the server.
const auto parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && std_in.eof())))
throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
{
const auto & settings = global_context->getSettingsRef();
if (settings.throw_if_no_data_to_insert)
throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
else
return;
}
connection->sendQuery(
connection_parameters.timeouts,
@ -1646,7 +1655,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
catch (...)
{
// Surprisingly, this is a client error. A server error would
// have been reported w/o throwing (see onReceiveSeverException()).
// have been reported without throwing (see onReceiveSeverException()).
client_exception = std::make_unique<Exception>(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
have_error = true;
}
@ -1689,7 +1698,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
if (!test_hint.clientError() && !test_hint.serverError())
{
// No error was expected but it still occurred. This is the
// default case w/o test hint, doesn't need additional
// default case without test hint, doesn't need additional
// diagnostics.
error_matches_hint = false;
}

View File

@ -45,64 +45,64 @@ private:
/// This is internal method to use from COW.
/// It performs shallow copy with copy-ctor and not useful from outside.
/// If you want to copy column for modification, look at 'mutate' method.
virtual MutablePtr clone() const = 0;
[[nodiscard]] virtual MutablePtr clone() const = 0;
public:
/// Name of a Column. It is used in info messages.
virtual std::string getName() const { return getFamilyName(); }
[[nodiscard]] virtual std::string getName() const { return getFamilyName(); }
/// Name of a Column kind, without parameters (example: FixedString, Array).
virtual const char * getFamilyName() const = 0;
[[nodiscard]] virtual const char * getFamilyName() const = 0;
/// Type of data that column contains. It's an underlying type: UInt16 for Date, UInt32 for DateTime, so on.
virtual TypeIndex getDataType() const = 0;
[[nodiscard]] virtual TypeIndex getDataType() const = 0;
/** If column isn't constant, returns itself.
* If column is constant, transforms constant to full column (if column type allows such transform) and return it.
*/
virtual Ptr convertToFullColumnIfConst() const { return getPtr(); }
[[nodiscard]] virtual Ptr convertToFullColumnIfConst() const { return getPtr(); }
/// If column isn't ColumnLowCardinality, return itself.
/// If column is ColumnLowCardinality, transforms it to full column.
virtual Ptr convertToFullColumnIfLowCardinality() const { return getPtr(); }
[[nodiscard]] virtual Ptr convertToFullColumnIfLowCardinality() const { return getPtr(); }
/// If column isn't ColumnSparse, return itself.
/// If column is ColumnSparse, transforms it to full column.
virtual Ptr convertToFullColumnIfSparse() const { return getPtr(); }
[[nodiscard]] virtual Ptr convertToFullColumnIfSparse() const { return getPtr(); }
Ptr convertToFullIfNeeded() const
[[nodiscard]] Ptr convertToFullIfNeeded() const
{
return convertToFullColumnIfSparse()->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality();
}
/// Creates empty column with the same type.
virtual MutablePtr cloneEmpty() const { return cloneResized(0); }
[[nodiscard]] virtual MutablePtr cloneEmpty() const { return cloneResized(0); }
/// Creates column with the same type and specified size.
/// If size is less current size, then data is cut.
/// If size is greater, than default values are appended.
virtual MutablePtr cloneResized(size_t /*size*/) const { throw Exception("Cannot cloneResized() column " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
[[nodiscard]] virtual MutablePtr cloneResized(size_t /*size*/) const { throw Exception("Cannot cloneResized() column " + getName(), ErrorCodes::NOT_IMPLEMENTED); }
/// Returns number of values in column.
virtual size_t size() const = 0;
[[nodiscard]] virtual size_t size() const = 0;
/// There are no values in columns.
bool empty() const { return size() == 0; }
[[nodiscard]] bool empty() const { return size() == 0; }
/// Returns value of n-th element in universal Field representation.
/// Is used in rare cases, since creation of Field instance is expensive usually.
virtual Field operator[](size_t n) const = 0;
[[nodiscard]] virtual Field operator[](size_t n) const = 0;
/// Like the previous one, but avoids extra copying if Field is in a container, for example.
virtual void get(size_t n, Field & res) const = 0;
/// If possible, returns pointer to memory chunk which contains n-th element (if it isn't possible, throws an exception)
/// Is used to optimize some computations (in aggregation, for example).
virtual StringRef getDataAt(size_t n) const = 0;
[[nodiscard]] virtual StringRef getDataAt(size_t n) const = 0;
/// Like getData, but has special behavior for columns that contain variable-length strings.
/// Returns zero-ending memory chunk (i.e. its size is 1 byte longer).
virtual StringRef getDataAtWithTerminatingZero(size_t n) const
[[nodiscard]] virtual StringRef getDataAtWithTerminatingZero(size_t n) const
{
return getDataAt(n);
}
@ -110,19 +110,19 @@ public:
/// If column stores integers, it returns n-th element transformed to UInt64 using static_cast.
/// If column stores floating point numbers, bits of n-th elements are copied to lower bits of UInt64, the remaining bits are zeros.
/// Is used to optimize some computations (in aggregation, for example).
virtual UInt64 get64(size_t /*n*/) const
[[nodiscard]] virtual UInt64 get64(size_t /*n*/) const
{
throw Exception("Method get64 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
/// If column stores native numeric type, it returns n-th element casted to Float64
/// Is used in regression methods to cast each features into uniform type
virtual Float64 getFloat64(size_t /*n*/) const
[[nodiscard]] virtual Float64 getFloat64(size_t /*n*/) const
{
throw Exception("Method getFloat64 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
virtual Float32 getFloat32(size_t /*n*/) const
[[nodiscard]] virtual Float32 getFloat32(size_t /*n*/) const
{
throw Exception("Method getFloat32 is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
@ -131,31 +131,31 @@ public:
* For NULL values of Nullable column it is allowed to return arbitrary value.
* Otherwise throw an exception.
*/
virtual UInt64 getUInt(size_t /*n*/) const
[[nodiscard]] virtual UInt64 getUInt(size_t /*n*/) const
{
throw Exception("Method getUInt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
virtual Int64 getInt(size_t /*n*/) const
[[nodiscard]] virtual Int64 getInt(size_t /*n*/) const
{
throw Exception("Method getInt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
virtual bool isDefaultAt(size_t n) const = 0;
virtual bool isNullAt(size_t /*n*/) const { return false; }
[[nodiscard]] virtual bool isDefaultAt(size_t n) const = 0;
[[nodiscard]] virtual bool isNullAt(size_t /*n*/) const { return false; }
/** If column is numeric, return value of n-th element, casted to bool.
* For NULL values of Nullable column returns false.
* Otherwise throw an exception.
*/
virtual bool getBool(size_t /*n*/) const
[[nodiscard]] virtual bool getBool(size_t /*n*/) const
{
throw Exception("Method getBool is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
/// Removes all elements outside of specified range.
/// Is used in LIMIT operation, for example.
virtual Ptr cut(size_t start, size_t length) const
[[nodiscard]] virtual Ptr cut(size_t start, size_t length) const
{
MutablePtr res = cloneEmpty();
res->insertRangeFrom(*this, start, length);
@ -249,7 +249,7 @@ public:
* otherwise (i.e. < 0), makes reserve() using size of source column.
*/
using Filter = PaddedPODArray<UInt8>;
virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0;
[[nodiscard]] virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0;
/** Expand column by mask inplace. After expanding column will
* satisfy the following: if we filter it by given mask, we will
@ -262,11 +262,11 @@ public:
/// Permutes elements using specified permutation. Is used in sorting.
/// limit - if it isn't 0, puts only first limit elements in the result.
using Permutation = PaddedPODArray<size_t>;
virtual Ptr permute(const Permutation & perm, size_t limit) const = 0;
[[nodiscard]] virtual Ptr permute(const Permutation & perm, size_t limit) const = 0;
/// Creates new column with values column[indexes[:limit]]. If limit is 0, all indexes are used.
/// Indexes must be one of the ColumnUInt. For default implementation, see selectIndexImpl from ColumnsCommon.h
virtual Ptr index(const IColumn & indexes, size_t limit) const = 0;
[[nodiscard]] virtual Ptr index(const IColumn & indexes, size_t limit) const = 0;
/** Compares (*this)[n] and rhs[m]. Column rhs should have the same type.
* Returns negative number, 0, or positive number (*this)[n] is less, equal, greater than rhs[m] respectively.
@ -279,10 +279,10 @@ public:
*
* For non Nullable and non floating point types, nan_direction_hint is ignored.
*/
virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
[[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
/// Equivalent to compareAt, but collator is used to compare values.
virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const
[[nodiscard]] virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const
{
throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing it.", ErrorCodes::BAD_COLLATION);
}
@ -297,7 +297,7 @@ public:
int direction, int nan_direction_hint) const = 0;
/// Check if all elements in the column have equal values. Return true if column is empty.
virtual bool hasEqualValues() const = 0;
[[nodiscard]] virtual bool hasEqualValues() const = 0;
enum class PermutationSortDirection : uint8_t
{
@ -353,7 +353,7 @@ public:
*/
using Offset = UInt64;
using Offsets = PaddedPODArray<Offset>;
virtual Ptr replicate(const Offsets & offsets) const = 0;
[[nodiscard]] virtual Ptr replicate(const Offsets & offsets) const = 0;
/** Split column to smaller columns. Each value goes to column index, selected by corresponding element of 'selector'.
* Selector must contain values from 0 to num_columns - 1.
@ -361,7 +361,7 @@ public:
*/
using ColumnIndex = UInt64;
using Selector = PaddedPODArray<ColumnIndex>;
virtual std::vector<MutablePtr> scatter(ColumnIndex num_columns, const Selector & selector) const = 0;
[[nodiscard]] virtual std::vector<MutablePtr> scatter(ColumnIndex num_columns, const Selector & selector) const = 0;
/// Insert data from several other columns according to source mask (used in vertical merge).
/// For now it is a helper to de-virtualize calls to insert*() functions inside gather loop
@ -385,15 +385,15 @@ public:
virtual void ensureOwnership() {}
/// Size of column data in memory (may be approximate) - for profiling. Zero, if could not be determined.
virtual size_t byteSize() const = 0;
[[nodiscard]] virtual size_t byteSize() const = 0;
/// Size of single value in memory (for accounting purposes)
virtual size_t byteSizeAt(size_t /*n*/) const = 0;
[[nodiscard]] virtual size_t byteSizeAt(size_t /*n*/) const = 0;
/// Size of memory, allocated for column.
/// This is greater or equals to byteSize due to memory reservation in containers.
/// Zero, if could not be determined.
virtual size_t allocatedBytes() const = 0;
[[nodiscard]] virtual size_t allocatedBytes() const = 0;
/// Make memory region readonly with mprotect if it is large enough.
/// The operation is slow and performed only for debug builds.
@ -406,14 +406,14 @@ public:
/// Columns have equal structure.
/// If true - you can use "compareAt", "insertFrom", etc. methods.
virtual bool structureEquals(const IColumn &) const
[[nodiscard]] virtual bool structureEquals(const IColumn &) const
{
throw Exception("Method structureEquals is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}
/// Returns ration of values in column, that equal to default value of column.
/// Checks only @sample_ratio ratio of rows.
virtual double getRatioOfDefaultRows(double sample_ratio = 1.0) const = 0; /// NOLINT
[[nodiscard]] virtual double getRatioOfDefaultRows(double sample_ratio = 1.0) const = 0; /// NOLINT
/// Returns indices of values in column, that not equal to default value of column.
virtual void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const = 0;
@ -423,13 +423,13 @@ public:
/// Other values are filled by @default_value.
/// @shift means how much rows to skip from the beginning of current column.
/// Used to create full column from sparse.
virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const;
[[nodiscard]] virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const;
virtual SerializationInfoPtr getSerializationInfo() const;
[[nodiscard]] virtual SerializationInfoPtr getSerializationInfo() const;
/// Compress column in memory to some representation that allows to decompress it back.
/// Return itself if compression is not applicable for this column type.
virtual Ptr compress() const
[[nodiscard]] virtual Ptr compress() const
{
/// No compression by default.
return getPtr();
@ -437,13 +437,13 @@ public:
/// If it's CompressedColumn, decompress it and return.
/// Otherwise return itself.
virtual Ptr decompress() const
[[nodiscard]] virtual Ptr decompress() const
{
return getPtr();
}
static MutablePtr mutate(Ptr ptr)
[[nodiscard]] static MutablePtr mutate(Ptr ptr)
{
MutablePtr res = ptr->shallowMutate(); /// Now use_count is 2.
ptr.reset(); /// Reset use_count to 1.
@ -463,10 +463,10 @@ public:
/// Various properties on behaviour of column type.
/// True if column contains something nullable inside. It's true for ColumnNullable, can be true or false for ColumnConst, etc.
virtual bool isNullable() const { return false; }
[[nodiscard]] virtual bool isNullable() const { return false; }
/// It's a special kind of column, that contain single value, but is not a ColumnConst.
virtual bool isDummy() const { return false; }
[[nodiscard]] virtual bool isDummy() const { return false; }
/** Memory layout properties.
*
@ -486,32 +486,32 @@ public:
*/
/// Values in column have fixed size (including the case when values span many memory segments).
virtual bool valuesHaveFixedSize() const { return isFixedAndContiguous(); }
[[nodiscard]] virtual bool valuesHaveFixedSize() const { return isFixedAndContiguous(); }
/// Values in column are represented as continuous memory segment of fixed size. Implies valuesHaveFixedSize.
virtual bool isFixedAndContiguous() const { return false; }
[[nodiscard]] virtual bool isFixedAndContiguous() const { return false; }
/// If isFixedAndContiguous, returns the underlying data array, otherwise throws an exception.
virtual StringRef getRawData() const { throw Exception("Column " + getName() + " is not a contiguous block of memory", ErrorCodes::NOT_IMPLEMENTED); }
[[nodiscard]] virtual StringRef getRawData() const { throw Exception("Column " + getName() + " is not a contiguous block of memory", ErrorCodes::NOT_IMPLEMENTED); }
/// If valuesHaveFixedSize, returns size of value, otherwise throw an exception.
virtual size_t sizeOfValueIfFixed() const { throw Exception("Values of column " + getName() + " are not fixed size.", ErrorCodes::CANNOT_GET_SIZE_OF_FIELD); }
[[nodiscard]] virtual size_t sizeOfValueIfFixed() const { throw Exception("Values of column " + getName() + " are not fixed size.", ErrorCodes::CANNOT_GET_SIZE_OF_FIELD); }
/// Column is ColumnVector of numbers or ColumnConst of it. Note that Nullable columns are not numeric.
virtual bool isNumeric() const { return false; }
[[nodiscard]] virtual bool isNumeric() const { return false; }
/// If the only value column can contain is NULL.
/// Does not imply type of object, because it can be ColumnNullable(ColumnNothing) or ColumnConst(ColumnNullable(ColumnNothing))
virtual bool onlyNull() const { return false; }
[[nodiscard]] virtual bool onlyNull() const { return false; }
/// Can be inside ColumnNullable.
virtual bool canBeInsideNullable() const { return false; }
[[nodiscard]] virtual bool canBeInsideNullable() const { return false; }
virtual bool lowCardinality() const { return false; }
[[nodiscard]] virtual bool lowCardinality() const { return false; }
virtual bool isSparse() const { return false; }
[[nodiscard]] virtual bool isSparse() const { return false; }
virtual bool isCollationSupported() const { return false; }
[[nodiscard]] virtual bool isCollationSupported() const { return false; }
virtual ~IColumn() = default;
IColumn() = default;
@ -519,7 +519,7 @@ public:
/** Print column name, size, and recursively print all subcolumns.
*/
String dumpStructure() const;
[[nodiscard]] String dumpStructure() const;
protected:
/// Template is to devirtualize calls to insertFrom method.

14
src/Common/Concepts.h Normal file
View File

@ -0,0 +1,14 @@
#pragma once
#include <concepts>
namespace DB
{
template <typename... T>
concept OptionalArgument = requires(T &&...)
{
requires(sizeof...(T) == 0 || sizeof...(T) == 1);
};
}

View File

@ -55,7 +55,7 @@ public:
/// 2) Determine the includes file from the config: <include_from>/path2/metrika.xml</include_from>
/// If this path is not configured, use /etc/metrika.xml
/// 3) Replace elements matching the "<foo incl="bar"/>" pattern with
/// "<foo>contents of the yandex/bar element in metrika.xml</foo>"
/// "<foo>contents of the clickhouse/bar element in metrika.xml</foo>"
/// 4) If zk_node_cache is non-NULL, replace elements matching the "<foo from_zk="/bar">" pattern with
/// "<foo>contents of the /bar ZooKeeper node</foo>".
/// If has_zk_includes is non-NULL and there are such elements, set has_zk_includes to true.
@ -137,4 +137,3 @@ private:
};
}

View File

@ -89,7 +89,7 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
/// NOTE:
/// - Poco::Net::DNS::resolveOne(host) doesn't work for IP addresses like 127.0.0.2
/// - Poco::Net::IPAddress::tryParse() expect hex string for IPv6 (w/o brackets)
/// - Poco::Net::IPAddress::tryParse() expect hex string for IPv6 (without brackets)
if (host.starts_with('['))
{
assert(host.ends_with(']'));

View File

@ -233,6 +233,88 @@ FileSegments LRUFileCache::splitRangeIntoCells(
return file_segments;
}
void LRUFileCache::fillHolesWithEmptyFileSegments(
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock)
{
/// There are segments [segment1, ..., segmentN]
/// (non-overlapping, non-empty, ascending-ordered) which (maybe partially)
/// intersect with given range.
/// It can have holes:
/// [____________________] -- requested range
/// [____] [_] [_________] -- intersecting cache [segment1, ..., segmentN]
///
/// For each such hole create a cell with file segment state EMPTY.
auto it = file_segments.begin();
auto segment_range = (*it)->range();
size_t current_pos;
if (segment_range.left < range.left)
{
/// [_______ -- requested range
/// [_______
/// ^
/// segment1
current_pos = segment_range.right + 1;
++it;
}
else
current_pos = range.left;
while (current_pos <= range.right && it != file_segments.end())
{
segment_range = (*it)->range();
if (current_pos == segment_range.left)
{
current_pos = segment_range.right + 1;
++it;
continue;
}
assert(current_pos < segment_range.left);
auto hole_size = segment_range.left - current_pos;
if (fill_with_detached_file_segments)
{
auto file_segment = std::make_shared<FileSegment>(current_pos, hole_size, key, this, FileSegment::State::EMPTY);
file_segment->detached = true;
file_segments.insert(it, file_segment);
}
else
{
file_segments.splice(it, splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
}
current_pos = segment_range.right + 1;
++it;
}
if (current_pos <= range.right)
{
/// ________] -- requested range
/// _____]
/// ^
/// segmentN
auto hole_size = range.right - current_pos + 1;
if (fill_with_detached_file_segments)
{
auto file_segment = std::make_shared<FileSegment>(current_pos, hole_size, key, this, FileSegment::State::EMPTY);
file_segment->detached = true;
file_segments.insert(file_segments.end(), file_segment);
}
else
{
file_segments.splice(file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
}
}
}
FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t size)
{
assertInitialized();
@ -254,69 +336,42 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t
}
else
{
/// There are segments [segment1, ..., segmentN]
/// (non-overlapping, non-empty, ascending-ordered) which (maybe partially)
/// intersect with given range.
/// It can have holes:
/// [____________________] -- requested range
/// [____] [_] [_________] -- intersecting cache [segment1, ..., segmentN]
///
/// For each such hole create a cell with file segment state EMPTY.
auto it = file_segments.begin();
auto segment_range = (*it)->range();
size_t current_pos;
if (segment_range.left < range.left)
{
/// [_______ -- requested range
/// [_______
/// ^
/// segment1
current_pos = segment_range.right + 1;
++it;
}
else
current_pos = range.left;
while (current_pos <= range.right && it != file_segments.end())
{
segment_range = (*it)->range();
if (current_pos == segment_range.left)
{
current_pos = segment_range.right + 1;
++it;
continue;
}
assert(current_pos < segment_range.left);
auto hole_size = segment_range.left - current_pos;
file_segments.splice(it, splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
current_pos = segment_range.right + 1;
++it;
}
if (current_pos <= range.right)
{
/// ________] -- requested range
/// _____]
/// ^
/// segmentN
auto hole_size = range.right - current_pos + 1;
file_segments.splice(file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
}
fillHolesWithEmptyFileSegments(file_segments, key, range, false, cache_lock);
}
assert(!file_segments.empty());
return FileSegmentsHolder(std::move(file_segments));
}
FileSegmentsHolder LRUFileCache::get(const Key & key, size_t offset, size_t size)
{
assertInitialized();
FileSegment::Range range(offset, offset + size - 1);
std::lock_guard cache_lock(mutex);
#ifndef NDEBUG
assertCacheCorrectness(key, cache_lock);
#endif
/// Get all segments which intersect with the given range.
auto file_segments = getImpl(key, range, cache_lock);
if (file_segments.empty())
{
auto file_segment = std::make_shared<FileSegment>(offset, size, key, this, FileSegment::State::EMPTY);
file_segment->detached = true;
file_segments = { file_segment };
}
else
{
fillHolesWithEmptyFileSegments(file_segments, key, range, true, cache_lock);
}
return FileSegmentsHolder(std::move(file_segments));
}
LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
const Key & key, size_t offset, size_t size, FileSegment::State state,
std::lock_guard<std::mutex> & cache_lock)

View File

@ -72,6 +72,17 @@ public:
*/
virtual FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) = 0;
/**
* Segments in returned list are ordered in ascending order and represent a full contiguous
* interval (no holes). Each segment in returned list has state: DOWNLOADED, DOWNLOADING or EMPTY.
*
* If file segment has state EMPTY, then it is also marked as "detached". E.g. it is "detached"
* from cache (not owned by cache), and as a result will never change it's state and will be destructed
* with the destruction of the holder, while in getOrSet() EMPTY file segments can eventually change
* it's state (and become DOWNLOADED).
*/
virtual FileSegmentsHolder get(const Key & key, size_t offset, size_t size) = 0;
virtual FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) = 0;
virtual FileSegments getSnapshot() const = 0;
@ -124,6 +135,8 @@ public:
FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) override;
FileSegmentsHolder get(const Key & key, size_t offset, size_t size) override;
FileSegments getSnapshot() const override;
FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) override;
@ -213,6 +226,9 @@ private:
String dumpStructureImpl(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
void fillHolesWithEmptyFileSegments(
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock);
public:
struct Stat
{

View File

@ -107,6 +107,9 @@ String FileSegment::getOrSetDownloader()
{
std::lock_guard segment_lock(mutex);
if (detached)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cannot set downloader for a detached file segment");
if (downloader_id.empty())
{
assert(download_state != State::DOWNLOADING);
@ -160,6 +163,11 @@ bool FileSegment::isDownloader() const
return getCallerId() == downloader_id;
}
bool FileSegment::isDownloaderImpl(std::lock_guard<std::mutex> & /* segment_lock */) const
{
return getCallerId() == downloader_id;
}
FileSegment::RemoteFileReaderPtr FileSegment::getRemoteFileReader()
{
if (!isDownloader())
@ -216,6 +224,8 @@ void FileSegment::write(const char * from, size_t size, size_t offset_)
"Attempt to write {} bytes to offset: {}, but current download offset is {}",
size, offset_, download_offset);
assertNotDetached();
if (!cache_writer)
{
if (downloaded_size > 0)
@ -263,6 +273,8 @@ void FileSegment::writeInMemory(const char * from, size_t size)
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Not enough space is reserved. Available: {}, expected: {}", availableSize(), size);
assertNotDetached();
std::lock_guard segment_lock(mutex);
if (cache_writer)
@ -297,7 +309,9 @@ size_t FileSegment::finalizeWrite()
size_t size = cache_writer->offset();
if (size == 0)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing size is not allowed");
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing zero size is not allowed");
assertNotDetached();
try
{
@ -352,6 +366,8 @@ bool FileSegment::reserve(size_t size)
if (!size)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Zero space reservation is not allowed");
assertNotDetached();
{
std::lock_guard segment_lock(mutex);
@ -386,6 +402,9 @@ bool FileSegment::reserve(size_t size)
void FileSegment::setDownloaded(std::lock_guard<std::mutex> & /* segment_lock */)
{
if (is_downloaded)
return;
download_state = State::DOWNLOADED;
is_downloaded = true;
downloader_id.clear();
@ -415,11 +434,13 @@ void FileSegment::completeBatchAndResetDownloader()
{
std::lock_guard segment_lock(mutex);
bool is_downloader = downloader_id == getCallerId();
if (!is_downloader)
if (!isDownloaderImpl(segment_lock))
{
cv.notify_all();
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "File segment can be completed only by downloader");
throw Exception(
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"File segment can be completed only by downloader ({} != {})",
downloader_id, getCallerId());
}
resetDownloaderImpl(segment_lock);
@ -434,7 +455,7 @@ void FileSegment::complete(State state)
std::lock_guard cache_lock(cache->mutex);
std::lock_guard segment_lock(mutex);
bool is_downloader = downloader_id == getCallerId();
bool is_downloader = isDownloaderImpl(segment_lock);
if (!is_downloader)
{
cv.notify_all();
@ -451,15 +472,20 @@ void FileSegment::complete(State state)
"Cannot complete file segment with state: {}", stateToString(state));
}
if (state == State::DOWNLOADED)
setDownloaded(segment_lock);
download_state = state;
assertNotDetached();
try
{
completeImpl(cache_lock, segment_lock);
}
catch (...)
{
if (!downloader_id.empty() && downloader_id == getCallerIdImpl())
if (!downloader_id.empty() && is_downloader)
downloader_id.clear();
cv.notify_all();
@ -476,15 +502,21 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
if (download_state == State::SKIP_CACHE || detached)
return;
if (download_state != State::DOWNLOADED && getDownloadedSize(segment_lock) == range().size())
if (isDownloaderImpl(segment_lock)
&& download_state != State::DOWNLOADED
&& getDownloadedSize(segment_lock) == range().size())
{
setDownloaded(segment_lock);
}
assertNotDetached();
if (download_state == State::DOWNLOADING || download_state == State::EMPTY)
{
/// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the
/// downloader or the only owner of the segment.
bool can_update_segment_state = downloader_id == getCallerIdImpl()
bool can_update_segment_state = isDownloaderImpl(segment_lock)
|| cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
if (can_update_segment_state)
@ -497,7 +529,7 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
}
catch (...)
{
if (!downloader_id.empty() && downloader_id == getCallerIdImpl())
if (!downloader_id.empty() && isDownloaderImpl(segment_lock))
downloader_id.clear();
cv.notify_all();
@ -543,7 +575,7 @@ void FileSegment::completeImpl(std::lock_guard<std::mutex> & cache_lock, std::lo
}
}
if (!downloader_id.empty() && (downloader_id == getCallerIdImpl() || is_last_holder))
if (!downloader_id.empty() && (isDownloaderImpl(segment_lock) || is_last_holder))
{
LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state));
downloader_id.clear();
@ -608,6 +640,12 @@ void FileSegment::assertCorrectnessImpl(std::lock_guard<std::mutex> & /* segment
assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0);
}
void FileSegment::assertNotDetached() const
{
if (detached)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Operation not allowed, file segment is detached");
}
FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard<std::mutex> & /* cache_lock */)
{
auto snapshot = std::make_shared<FileSegment>(
@ -641,6 +679,15 @@ FileSegmentsHolder::~FileSegmentsHolder()
if (!cache)
cache = file_segment->cache;
if (file_segment->detached)
{
/// This file segment is not owned by cache, so it will be destructed
/// at this point, therefore no completion required.
assert(file_segment->state() == FileSegment::State::EMPTY);
file_segment_it = file_segments.erase(current_file_segment_it);
continue;
}
try
{
/// File segment pointer must be reset right after calling complete() and

View File

@ -150,9 +150,11 @@ private:
size_t getDownloadedSize(std::lock_guard<std::mutex> & segment_lock) const;
String getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock) const;
void assertCorrectnessImpl(std::lock_guard<std::mutex> & segment_lock) const;
void assertNotDetached() const;
void setDownloaded(std::lock_guard<std::mutex> & segment_lock);
void setDownloadFailed(std::lock_guard<std::mutex> & segment_lock);
bool isDownloaderImpl(std::lock_guard<std::mutex> & segment_lock) const;
void wrapWithCacheInfo(Exception & e, const String & message, std::lock_guard<std::mutex> & segment_lock) const;
@ -199,6 +201,8 @@ private:
Poco::Logger * log;
/// "detached" file segment means that it is not owned by cache ("detached" from cache).
/// In general case, all file segments are owned by cache.
bool detached = false;
std::atomic<bool> is_downloaded{false};

View File

@ -13,7 +13,7 @@
*
* Example: when we do aggregation by the visitor ID, the performance increase is more than 5 times.
* This is because of following reasons:
* - in Yandex, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
* - in Metrica web analytics system, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
* - in typical implementation of standard library, hash function for integers is trivial and just use lower bits;
* - traffic is non-uniformly distributed across a day;
* - we are using open-addressing linear probing hash tables that are most critical to hash function quality,

View File

@ -28,7 +28,7 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
* in which all metacharacters are escaped,
* and also if there are no '|' outside the brackets,
* and also avoid substrings of the form `http://` or `www` and some other
* (this is the hack for typical use case in Yandex.Metrica).
* (this is the hack for typical use case in web analytics applications).
*/
const char * begin = regexp.data();
const char * pos = begin;

View File

@ -31,7 +31,7 @@ namespace Util
/// context can't own, as Context is destroyed before logger,
/// and logger lives longer and logging can still happen after Context destruction.
/// resetting masker in the logger at the moment of
/// context destruction can't be done w/o synchronization / locks in a safe manner.
/// context destruction can't be done without synchronization / locks in a safe manner.
///
/// logger is Poco derived and i didn't want to brake it's interface,
/// also logger can be dynamically reconfigured without server restart,

View File

@ -1,4 +1,4 @@
# These files are located in separate library, because they are used by Yandex.Metrika code
# These files are located in separate library, because they are used by separate products
# in places when no dependency on whole "dbms" library is possible.
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")

View File

@ -1,4 +1,5 @@
#include <Common/ThreadPool.h>
#include <Common/setThreadName.h>
#include <Common/Exception.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
@ -243,6 +244,9 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
while (true)
{
/// This is inside the loop to also reset previous thread names set inside the jobs.
setThreadName("ThreadPool");
Job job;
bool need_shutdown = false;

View File

@ -100,7 +100,7 @@ enum ComputeWidthMode
};
template <ComputeWidthMode mode>
static size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
{
UTF8Decoder decoder;
size_t width = 0;

View File

@ -45,7 +45,7 @@ struct ZooKeeperResponse : virtual Response
using ZooKeeperResponsePtr = std::shared_ptr<ZooKeeperResponse>;
/// Exposed in header file for Yandex.Metrica code.
/// Exposed in header file for some external code.
struct ZooKeeperRequest : virtual Request
{
XID xid = 0;
@ -57,6 +57,8 @@ struct ZooKeeperRequest : virtual Request
bool restored_from_zookeeper_log = false;
UInt64 request_created_time_ns = 0;
UInt64 thread_id = 0;
String query_id;
ZooKeeperRequest() = default;
ZooKeeperRequest(const ZooKeeperRequest &) = default;

View File

@ -8,6 +8,7 @@
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <base/logger_useful.h>
#include <base/getThreadId.h>
#include <Common/config.h>
@ -1016,6 +1017,11 @@ void ZooKeeper::pushRequest(RequestInfo && info)
try
{
info.time = clock::now();
if (zk_log)
{
info.request->thread_id = getThreadId();
info.request->query_id = String(CurrentThread::getQueryId());
}
if (!info.request->xid)
{
@ -1269,6 +1275,11 @@ void ZooKeeper::logOperationIfNeeded(const ZooKeeperRequestPtr & request, const
elem.event_time = event_time;
elem.address = socket_address;
elem.session_id = session_id;
if (request)
{
elem.thread_id = request->thread_id;
elem.query_id = request->query_id;
}
maybe_zk_log->add(elem);
}
}

View File

@ -7,8 +7,5 @@ target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zoo
add_executable(zkutil_test_async zkutil_test_async.cpp)
target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper_no_log)
add_executable (zk_many_watches_reconnect zk_many_watches_reconnect.cpp)
target_link_libraries (zk_many_watches_reconnect PRIVATE clickhouse_common_zookeeper_no_log clickhouse_common_config)
add_executable (zookeeper_impl zookeeper_impl.cpp)
target_link_libraries (zookeeper_impl PRIVATE clickhouse_common_zookeeper_no_log)

View File

@ -1,66 +0,0 @@
#include <Common/Config/ConfigProcessor.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Poco/Event.h>
#include <iostream>
/// A tool for reproducing https://issues.apache.org/jira/browse/ZOOKEEPER-706
/// Original libzookeeper can't reconnect the session if the length of SET_WATCHES message
/// exceeds jute.maxbuffer (0xfffff by default).
/// This happens when the number of watches exceeds ~29000.
///
/// Session reconnect can be caused by forbidding packets to the current zookeeper server, e.g.
/// sudo ip6tables -A OUTPUT -d mtzoo01it.haze.yandex.net -j REJECT
const size_t N_THREADS = 100;
int main(int argc, char ** argv)
{
try
{
if (argc != 3)
{
std::cerr << "usage: " << argv[0] << " <zookeeper_config> <number_of_watches>" << std::endl;
return 3;
}
DB::ConfigProcessor processor(argv[1], false, true);
auto config = processor.loadConfig().configuration;
zkutil::ZooKeeper zk(*config, "zookeeper", nullptr);
zkutil::EventPtr watch = std::make_shared<Poco::Event>();
/// NOTE: setting watches in multiple threads because doing it in a single thread is too slow.
size_t watches_per_thread = std::stoull(argv[2]) / N_THREADS;
std::vector<std::thread> threads;
for (size_t i_thread = 0; i_thread < N_THREADS; ++i_thread)
{
threads.emplace_back([&, i_thread]
{
for (size_t i = 0; i < watches_per_thread; ++i)
zk.exists("/clickhouse/nonexistent_node" + std::to_string(i * N_THREADS + i_thread), nullptr, watch);
});
}
for (size_t i_thread = 0; i_thread < N_THREADS; ++i_thread)
threads[i_thread].join();
while (true)
{
std::cerr << "WAITING..." << std::endl;
sleep(10);
}
}
catch (Poco::Exception & e)
{
std::cerr << "Exception: " << e.displayText() << std::endl;
return 1;
}
catch (std::exception & e)
{
std::cerr << "std::exception: " << e.what() << std::endl;
return 3;
}
catch (...)
{
std::cerr << "Some exception" << std::endl;
return 2;
}
}

View File

@ -234,6 +234,11 @@ bool createFile(const std::string & path)
DB::throwFromErrnoWithPath("Cannot create file: " + path, path, DB::ErrorCodes::CANNOT_CREATE_FILE);
}
bool exists(const std::string & path)
{
return faccessat(AT_FDCWD, path.c_str(), F_OK, AT_EACCESS) == 0;
}
bool canRead(const std::string & path)
{
struct stat st;
@ -249,7 +254,6 @@ bool canRead(const std::string & path)
DB::throwFromErrnoWithPath("Cannot check read access to file: " + path, path, DB::ErrorCodes::PATH_ACCESS_DENIED);
}
bool canWrite(const std::string & path)
{
struct stat st;
@ -265,6 +269,13 @@ bool canWrite(const std::string & path)
DB::throwFromErrnoWithPath("Cannot check write access to file: " + path, path, DB::ErrorCodes::PATH_ACCESS_DENIED);
}
bool canExecute(const std::string & path)
{
if (exists(path))
return faccessat(AT_FDCWD, path.c_str(), X_OK, AT_EACCESS) == 0;
DB::throwFromErrnoWithPath("Cannot check execute access to file: " + path, path, DB::ErrorCodes::PATH_ACCESS_DENIED);
}
time_t getModificationTime(const std::string & path)
{
struct stat st;

View File

@ -70,8 +70,10 @@ namespace FS
{
bool createFile(const std::string & path);
bool exists(const std::string & path);
bool canRead(const std::string & path);
bool canWrite(const std::string & path);
bool canExecute(const std::string & path);
time_t getModificationTime(const std::string & path);
Poco::Timestamp getModificationTimestamp(const std::string & path);

View File

@ -5,82 +5,63 @@
# include <cmath>
# include <fstream>
#endif
#if USE_CPUID
# include <libcpuid/libcpuid.h>
#endif
#include <thread>
#if defined(OS_LINUX)
unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count)
static int readFrom(const char * filename, int default_value)
{
// Try to look at cgroups limit if it is available.
auto read_from = [](const char * filename, int default_value) -> int {
std::ifstream infile(filename);
if (!infile.is_open())
{
return default_value;
}
int idata;
if (infile >> idata)
return idata;
else
return default_value;
};
std::ifstream infile(filename);
if (!infile.is_open())
return default_value;
int idata;
if (infile >> idata)
return idata;
else
return default_value;
}
/// Try to look at cgroups limit if it is available.
static unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count)
{
unsigned quota_count = default_cpu_count;
// Return the number of milliseconds per period process is guaranteed to run.
// -1 for no quota
int cgroup_quota = read_from("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", -1);
int cgroup_period = read_from("/sys/fs/cgroup/cpu/cpu.cfs_period_us", -1);
/// Return the number of milliseconds per period process is guaranteed to run.
/// -1 for no quota
int cgroup_quota = readFrom("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", -1);
int cgroup_period = readFrom("/sys/fs/cgroup/cpu/cpu.cfs_period_us", -1);
if (cgroup_quota > -1 && cgroup_period > 0)
{
quota_count = ceil(static_cast<float>(cgroup_quota) / static_cast<float>(cgroup_period));
}
return std::min(default_cpu_count, quota_count);
}
#endif // OS_LINUX
#endif
static unsigned getNumberOfPhysicalCPUCoresImpl()
{
unsigned cpu_count = std::thread::hardware_concurrency();
/// Most of x86_64 CPUs have 2-way Hyper-Threading
/// Aarch64 and RISC-V don't have SMT so far.
/// POWER has SMT and it can be multiple way (like 8-way), but we don't know how ClickHouse really behaves, so use all of them.
#if defined(__x86_64__)
/// Let's limit ourself to the number of physical cores.
/// But if the number of logical cores is small - maybe it is a small machine
/// or very limited cloud instance and it is reasonable to use all the cores.
if (cpu_count >= 8)
cpu_count /= 2;
#endif
#if defined(OS_LINUX)
cpu_count = getCGroupLimitedCPUCores(cpu_count);
#endif
return cpu_count;
}
unsigned getNumberOfPhysicalCPUCores()
{
static const unsigned number = [] {
unsigned cpu_count = 0; // start with an invalid num
#if USE_CPUID
do
{
cpu_raw_data_t raw_data;
cpu_id_t data;
/// On Xen VMs, libcpuid returns wrong info (zero number of cores). Fallback to alternative method.
/// Also, libcpuid does not support some CPUs like AMD Hygon C86 7151.
if (0 != cpuid_get_raw_data(&raw_data) || 0 != cpu_identify(&raw_data, &data) || data.num_logical_cpus == 0)
{
// Just fallback
break;
}
cpu_count = data.num_cores * data.total_logical_cpus / data.num_logical_cpus;
/// Also, libcpuid gives strange result on Google Compute Engine VMs.
/// Example:
/// num_cores = 12, /// number of physical cores on current CPU socket
/// total_logical_cpus = 1, /// total number of logical cores on all sockets
/// num_logical_cpus = 24. /// number of logical cores on current CPU socket
/// It means two-way hyper-threading (24 / 12), but contradictory, 'total_logical_cpus' == 1.
} while (false);
#endif
/// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system.
/// (Actually, only Aarch64 is supported).
if (cpu_count == 0)
cpu_count = std::thread::hardware_concurrency();
#if defined(OS_LINUX)
/// TODO: add a setting for disabling that, similar to UseContainerSupport in java
cpu_count = getCGroupLimitedCPUCores(cpu_count);
#endif // OS_LINUX
return cpu_count;
}();
return number;
/// Calculate once.
static auto res = getNumberOfPhysicalCPUCoresImpl();
return res;
}

View File

@ -1,8 +1,11 @@
#pragma once
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
#include <new>
#include <base/defines.h>
#include <Common/Concepts.h>
#include <Common/CurrentMemoryTracker.h>
#include <Common/config.h>
@ -14,13 +17,24 @@
# include <cstdlib>
#endif
namespace Memory
{
inline ALWAYS_INLINE void * newImpl(std::size_t size)
inline ALWAYS_INLINE size_t alignToSizeT(std::align_val_t align) noexcept
{
auto * ptr = malloc(size);
return static_cast<size_t>(align);
}
template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align)
{
void * ptr = nullptr;
if constexpr (sizeof...(TAlign) == 1)
ptr = aligned_alloc(alignToSizeT(align...), size);
else
ptr = malloc(size);
if (likely(ptr != nullptr))
return ptr;
@ -33,6 +47,11 @@ inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
return malloc(size);
}
inline ALWAYS_INLINE void * newNoExept(std::size_t size, std::align_val_t align) noexcept
{
return aligned_alloc(static_cast<size_t>(align), size);
}
inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
{
free(ptr);
@ -40,17 +59,24 @@ inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
#if USE_JEMALLOC
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept
template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size, TAlign... align) noexcept
{
if (unlikely(ptr == nullptr))
return;
sdallocx(ptr, size, 0);
if constexpr (sizeof...(TAlign) == 1)
sdallocx(ptr, size, MALLOCX_ALIGN(alignToSizeT(align...)));
else
sdallocx(ptr, size, 0);
}
#else
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept
template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]], TAlign... /* align */) noexcept
{
free(ptr);
}
@ -58,13 +84,14 @@ inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unuse
#endif
#if defined(OS_LINUX)
# include <malloc.h>
# include <malloc.h>
#elif defined(OS_DARWIN)
# include <malloc/malloc.h>
# include <malloc/malloc.h>
#endif
inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size)
template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size, TAlign... align [[maybe_unused]])
{
size_t actual_size = size;
@ -72,26 +99,41 @@ inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size)
/// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function
/// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic.
if (likely(size != 0))
actual_size = nallocx(size, 0);
{
if constexpr (sizeof...(TAlign) == 1)
actual_size = nallocx(size, MALLOCX_ALIGN(alignToSizeT(align...)));
else
actual_size = nallocx(size, 0);
}
#endif
return actual_size;
}
inline ALWAYS_INLINE void trackMemory(std::size_t size)
template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void trackMemory(std::size_t size, TAlign... align)
{
std::size_t actual_size = getActualAllocationSize(size);
std::size_t actual_size = getActualAllocationSize(size, align...);
CurrentMemoryTracker::allocNoThrow(actual_size);
}
inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept
template <std::same_as<std::align_val_t>... TAlign>
requires DB::OptionalArgument<TAlign...>
inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0, TAlign... align [[maybe_unused]]) noexcept
{
try
{
#if USE_JEMALLOC
/// @note It's also possible to use je_malloc_usable_size() here.
if (likely(ptr != nullptr))
CurrentMemoryTracker::free(sallocx(ptr, 0));
{
if constexpr (sizeof...(TAlign) == 1)
CurrentMemoryTracker::free(sallocx(ptr, MALLOCX_ALIGN(alignToSizeT(align...))));
else
CurrentMemoryTracker::free(sallocx(ptr, 0));
}
#else
if (size)
CurrentMemoryTracker::free(size);
@ -103,7 +145,10 @@ inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t
#endif
}
catch (...)
{}
{
}
}
}
#pragma GCC diagnostic pop

View File

@ -10,10 +10,10 @@
/** 'mysqlxx' - very simple library for replacement of 'mysql++' library.
*
* For whatever reason, in Yandex.Metrica, back in 2008, 'mysql++' library was used.
* For whatever reason, in Metrica web analytics system, back in 2008, 'mysql++' library was used.
* There are the following shortcomings of 'mysql++':
* 1. Too rich functionality: most of it is not used.
* 2. Low performance (when used for Yandex.Metrica).
* 2. Low performance (when used for Metrica).
*
* Low performance is caused by the following reasons:
*
@ -50,7 +50,7 @@
* And for the sake of simplicity, some functions work only with certain assumptions,
* or with slightly different semantic than in mysql++.
* And we don't care about cross-platform usage of mysqlxx.
* These assumptions are specific for Yandex.Metrica. Your mileage may vary.
* These assumptions are specific for Metrica. Your mileage may vary.
*
* mysqlxx could not be considered as separate full-featured library,
* because it is developed from the principle - "everything that we don't need is not implemented".

View File

@ -1,6 +1,7 @@
#include <Common/memory.h>
#include <Common/config.h>
#include <cassert>
#include <new>
#include <Common/config.h>
#include <Common/memory.h>
#if defined(OS_DARWIN) && (USE_JEMALLOC)
/// In case of OSX jemalloc register itself as a default zone allocator.
@ -53,12 +54,24 @@ void * operator new(std::size_t size)
return Memory::newImpl(size);
}
void * operator new(std::size_t size, std::align_val_t align)
{
Memory::trackMemory(size, align);
return Memory::newImpl(size, align);
}
void * operator new[](std::size_t size)
{
Memory::trackMemory(size);
return Memory::newImpl(size);
}
void * operator new[](std::size_t size, std::align_val_t align)
{
Memory::trackMemory(size, align);
return Memory::newImpl(size, align);
}
void * operator new(std::size_t size, const std::nothrow_t &) noexcept
{
Memory::trackMemory(size);
@ -71,6 +84,18 @@ void * operator new[](std::size_t size, const std::nothrow_t &) noexcept
return Memory::newNoExept(size);
}
void * operator new(std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept
{
Memory::trackMemory(size, align);
return Memory::newNoExept(size, align);
}
void * operator new[](std::size_t size, std::align_val_t align, const std::nothrow_t &) noexcept
{
Memory::trackMemory(size, align);
return Memory::newNoExept(size, align);
}
/// delete
/// C++17 std 21.6.2.1 (11)
@ -81,26 +106,51 @@ void * operator new[](std::size_t size, const std::nothrow_t &) noexcept
/// It's unspecified whether size-aware or size-unaware version is called when deleting objects of
/// incomplete type and arrays of non-class and trivially-destructible class types.
void operator delete(void * ptr) noexcept
{
Memory::untrackMemory(ptr);
Memory::deleteImpl(ptr);
}
void operator delete(void * ptr, std::align_val_t align) noexcept
{
Memory::untrackMemory(ptr, 0, align);
Memory::deleteImpl(ptr);
}
void operator delete[](void * ptr) noexcept
{
Memory::untrackMemory(ptr);
Memory::deleteImpl(ptr);
}
void operator delete[](void * ptr, std::align_val_t align) noexcept
{
Memory::untrackMemory(ptr, 0, align);
Memory::deleteImpl(ptr);
}
void operator delete(void * ptr, std::size_t size) noexcept
{
Memory::untrackMemory(ptr, size);
Memory::deleteSized(ptr, size);
}
void operator delete(void * ptr, std::size_t size, std::align_val_t align) noexcept
{
Memory::untrackMemory(ptr, size, align);
Memory::deleteSized(ptr, size, align);
}
void operator delete[](void * ptr, std::size_t size) noexcept
{
Memory::untrackMemory(ptr, size);
Memory::deleteSized(ptr, size);
}
void operator delete[](void * ptr, std::size_t size, std::align_val_t align) noexcept
{
Memory::untrackMemory(ptr, size, align);
Memory::deleteSized(ptr, size, align);
}

View File

@ -13,8 +13,8 @@ namespace DB
* Otherwise, an exception is thrown.
*
* Examples:
* yandex.ru - returns "yandex.ru" and default_port
* yandex.ru:80 - returns "yandex.ru" and 80
* clickhouse.com - returns "clickhouse.com" and default_port
* clickhouse.com:80 - returns "clickhouse.com" and 80
* [2a02:6b8:a::a]:80 - returns [2a02:6b8:a::a] and 80; note that square brackets remain in returned host.
*/
std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 default_port);

View File

@ -62,7 +62,7 @@ void CompressedReadBufferFromFile::seek(size_t offset_in_compressed_file, size_t
{
/// Nothing to do if we already at required position
if (!size_compressed && static_cast<size_t>(file_in.getPosition()) == offset_in_compressed_file && /// correct position in compressed file
(offset() == offset_in_decompressed_block /// correct position in buffer or
((!buffer().empty() && offset() == offset_in_decompressed_block) /// correct position in buffer or
|| nextimpl_working_buffer_offset == offset_in_decompressed_block)) /// we will move our position to correct one
return;

View File

@ -298,7 +298,7 @@ String ServerStatCommand::run()
write("Latency min/avg/max", latency.str());
write("Received", toString(stats.getPacketsReceived()));
write("Sent ", toString(stats.getPacketsSent()));
write("Sent", toString(stats.getPacketsSent()));
write("Connections", toString(keeper_info.alive_connections_count));
write("Outstanding", toString(keeper_info.outstanding_requests_count));
write("Zxid", toString(keeper_info.last_zxid));
@ -328,7 +328,7 @@ String StatCommand::run()
write("Latency min/avg/max", latency.str());
write("Received", toString(stats.getPacketsReceived()));
write("Sent ", toString(stats.getPacketsSent()));
write("Sent", toString(stats.getPacketsSent()));
write("Connections", toString(keeper_info.alive_connections_count));
write("Outstanding", toString(keeper_info.outstanding_requests_count));
write("Zxid", toString(keeper_info.last_zxid));

View File

@ -58,7 +58,7 @@ class IColumn;
M(Milliseconds, connect_timeout_with_failover_secure_ms, 100, "Connection timeout for selecting first healthy replica (for secure connections).", 0) \
M(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "", 0) \
M(Seconds, send_timeout, DBMS_DEFAULT_SEND_TIMEOUT_SEC, "", 0) \
M(Seconds, drain_timeout, 3, "Timeout for draining remote connections, -1 means synchronous drain w/o ignoring errors", 0) \
M(Seconds, drain_timeout, 3, "Timeout for draining remote connections, -1 means synchronous drain without ignoring errors", 0) \
M(Seconds, tcp_keep_alive_timeout, 290 /* less than DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC */, "The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes", 0) \
M(Milliseconds, hedged_connection_timeout_ms, 100, "Connection timeout for establishing connection with replica for Hedged requests", 0) \
M(Milliseconds, receive_data_timeout_ms, 2000, "Connection timeout for receiving first packet of data or packet with positive progress from replica", 0) \
@ -574,7 +574,7 @@ class IColumn;
M(Bool, check_table_dependencies, true, "Check that DDL query (such as DROP TABLE or RENAME) will not break dependencies", 0) \
M(Bool, use_local_cache_for_remote_storage, true, "Use local cache for remote storage like HDFS or S3, it's used for remote table engine only", 0) \
\
M(Bool, allow_unrestricted_reads_from_keeper, false, "Allow unrestricted (w/o condition on path) reads from system.zookeeper table, can be handy, but is not safe for zookeeper", 0) \
M(Bool, allow_unrestricted_reads_from_keeper, false, "Allow unrestricted (without condition on path) reads from system.zookeeper table, can be handy, but is not safe for zookeeper", 0) \
\
/** Experimental functions */ \
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
@ -582,6 +582,7 @@ class IColumn;
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, enabled by default", 0) \
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.

View File

@ -191,6 +191,33 @@ static void checkASTStructure(const ASTPtr & child)
ErrorCodes::UNEXPECTED_AST_STRUCTURE);
}
static void autoAssignNumberForEnum(const ASTPtr & arguments)
{
UInt64 literal_child_count = 0;
UInt64 func_child_count = 0;
ASTs assign_number_child;
assign_number_child.reserve(arguments->children.size());
for (const ASTPtr & child : arguments->children)
{
if (child->as<ASTLiteral>())
{
ASTPtr func = makeASTFunction("equals", child, std::make_shared<ASTLiteral>(++literal_child_count));
assign_number_child.emplace_back(func);
}
else
{
++func_child_count;
assign_number_child.emplace_back(child);
}
}
if (func_child_count > 0 && literal_child_count > 0)
throw Exception("ALL Elements of Enum data type must be of form: 'name' = number or 'name', where name is string literal and number is an integer",
ErrorCodes::UNEXPECTED_AST_STRUCTURE);
arguments->children = assign_number_child;
}
template <typename DataTypeEnum>
static DataTypePtr createExact(const ASTPtr & arguments)
{
@ -202,6 +229,7 @@ static DataTypePtr createExact(const ASTPtr & arguments)
using FieldType = typename DataTypeEnum::FieldType;
autoAssignNumberForEnum(arguments);
/// Children must be functions 'equals' with string literal as left argument and numeric literal as right argument.
for (const ASTPtr & child : arguments->children)
{
@ -236,6 +264,7 @@ static DataTypePtr create(const ASTPtr & arguments)
if (!arguments || arguments->children.empty())
throw Exception("Enum data type cannot be empty", ErrorCodes::EMPTY_DATA_PASSED);
autoAssignNumberForEnum(arguments);
/// Children must be functions 'equals' with string literal as left argument and numeric literal as right argument.
for (const ASTPtr & child : arguments->children)
{

View File

@ -139,11 +139,6 @@ void convertObjectsToTuples(Block & block, const NamesAndTypesList & extended_st
if (!isObject(column.type))
continue;
if (!isObject(column.type))
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Type for column '{}' mismatch in columns list and in block. In list: {}, in block: {}",
column.name, column.type->getName(), column.type->getName());
const auto & column_object = assert_cast<const ColumnObject &>(*column.column);
const auto & subcolumns = column_object.getSubcolumns();

View File

@ -136,10 +136,17 @@ void SerializationObject<Parser>::deserializeTextImpl(IColumn & column, Reader &
reader(buf);
std::optional<ParseResult> result;
/// Treat empty string as an empty object
/// for better CAST from String to Object.
if (!buf.empty())
{
auto parser = parsers_pool.get([] { return new Parser; });
result = parser->parse(buf.data(), buf.size());
}
else
{
result = ParseResult{};
}
if (!result)
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse object");

View File

@ -44,7 +44,7 @@ void DatabaseMemory::dropTable(
auto table = detachTableUnlocked(table_name, lock);
try
{
/// Remove table w/o lock since:
/// Remove table without lock since:
/// - it does not require it
/// - it may cause lock-order-inversion if underlying storage need to
/// resolve tables (like StorageLiveView)

View File

@ -7,7 +7,6 @@
#include <base/logger_useful.h>
#include <Common/LocalDateTime.h>
#include <Common/filesystemHelpers.h>
#include <Common/ShellCommand.h>
#include <Processors/Sources/ShellCommandSource.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
@ -15,12 +14,10 @@
#include <Interpreters/Context.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <Dictionaries/DictionarySourceFactory.h>
#include <Dictionaries/DictionarySourceHelpers.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/registerDictionaries.h>
namespace DB
@ -51,12 +48,18 @@ namespace
command,
user_scripts_path);
if (!std::filesystem::exists(std::filesystem::path(script_path)))
if (!FS::exists(script_path))
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Executable file {} does not exist inside user scripts folder {}",
command,
user_scripts_path);
if (!FS::canExecute(script_path))
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Executable file {} is not executable inside user scripts folder {}",
command,
user_scripts_path);
command = std::move(script_path);
}

View File

@ -7,7 +7,6 @@
#include <base/logger_useful.h>
#include <Common/LocalDateTime.h>
#include <Common/filesystemHelpers.h>
#include <Common/ShellCommand.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/Sources/ShellCommandSource.h>
@ -20,7 +19,6 @@
#include <Dictionaries/DictionarySourceHelpers.h>
#include <Dictionaries/DictionaryStructure.h>
namespace DB
{
@ -113,12 +111,18 @@ Pipe ExecutablePoolDictionarySource::getStreamForBlock(const Block & block)
command,
user_scripts_path);
if (!std::filesystem::exists(std::filesystem::path(script_path)))
if (!FS::exists(script_path))
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Executable file {} does not exist inside user scripts folder {}",
command,
user_scripts_path);
if (!FS::canExecute(script_path))
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Executable file {} is not executable inside user scripts folder {}",
command,
user_scripts_path);
command = std::move(script_path);
}

View File

@ -649,7 +649,7 @@ void HashedDictionary<dictionary_key_type, sparse>::calculateBytesAllocated()
if constexpr (sparse || std::is_same_v<AttributeValueType, Field>)
{
/// bucket_count() - Returns table size, that includes empty and deleted
/// size() - Returns table size, w/o empty and deleted
/// size() - Returns table size, without empty and deleted
/// and since this is sparsehash, empty cells should not be significant,
/// and since items cannot be removed from the dictionary, deleted is also not important.
bytes_allocated += container.size() * (sizeof(KeyType) + sizeof(AttributeValueType));

View File

@ -288,8 +288,8 @@ ColumnUInt8::Ptr IPAddressDictionary::hasKeys(const Columns & key_columns, const
{
validateKeyTypes(key_types);
const auto first_column = key_columns.front();
const auto rows = first_column->size();
const auto & first_column = key_columns.front();
const size_t rows = first_column->size();
auto result = ColumnUInt8::create(rows);
auto & out = result->getData();
@ -613,8 +613,8 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto first_column = key_columns.front();
const auto rows = first_column->size();
const auto & first_column = key_columns.front();
const size_t rows = first_column->size();
auto & vec = std::get<ContainerType<AttributeType>>(attribute.maps);
if (const auto * ipv4_col = std::get_if<IPv4Container>(&ip_column))
@ -694,8 +694,8 @@ void IPAddressDictionary::getItemsImpl(
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto first_column = key_columns.front();
const auto rows = first_column->size();
const auto & first_column = key_columns.front();
const size_t rows = first_column->size();
// special case for getBlockInputStream
if (unlikely(key_columns.size() == 2))

View File

@ -1,8 +1,6 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#if USE_AZURE_BLOB_STORAGE

View File

@ -1,6 +1,4 @@
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#include <Disks/DiskFactory.h>

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
* <disks>
* <web>
* <type>web</type>
* <endpoint>https://clickhouse-datasets.s3.yandex.net/disk-with-static-files-tests/test-hits/</endpoint>
* <endpoint>https://clickhouse-datasets.s3.amazonaws.com/disk-with-static-files-tests/test-hits/</endpoint>
* </web>
* </disks>
* <policies>

View File

@ -46,7 +46,15 @@ CachedReadBufferFromRemoteFS::CachedReadBufferFromRemoteFS(
void CachedReadBufferFromRemoteFS::initialize(size_t offset, size_t size)
{
file_segments_holder.emplace(cache->getOrSet(cache_key, offset, size));
if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
{
file_segments_holder.emplace(cache->get(cache_key, offset, size));
}
else
{
file_segments_holder.emplace(cache->getOrSet(cache_key, offset, size));
}
/**
* Segments in returned list are ordered in ascending order and represent a full contiguous
@ -326,6 +334,10 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
#endif
size_t seek_offset = file_offset_of_buffer_end - range.left;
if (file_offset_of_buffer_end < range.left)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invariant failed. Expected {} > {} (current offset > file segment's start offset)", file_offset_of_buffer_end, range.left);
read_buffer_for_file_segment->seek(seek_offset, SEEK_SET);
break;
@ -577,6 +589,8 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
{
last_caller_id = FileSegment::getCallerId();
assertCorrectness();
if (!initialized)
initialize(file_offset_of_buffer_end, getTotalSizeToRead());
@ -597,9 +611,12 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
{
try
{
bool file_segment_already_completed = !file_segment->isDownloader();
if (!file_segment_already_completed)
bool need_complete_file_segment = file_segment->isDownloader();
if (need_complete_file_segment)
{
LOG_TEST(log, "Resetting downloader {} from scope exit", file_segment->getDownloader());
file_segment->completeBatchAndResetDownloader();
}
}
catch (...)
{
@ -820,6 +837,12 @@ std::optional<size_t> CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset()
return std::nullopt;
}
void CachedReadBufferFromRemoteFS::assertCorrectness() const
{
if (IFileCache::isReadOnly() && !settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache usage is not allowed");
}
String CachedReadBufferFromRemoteFS::getInfoForLog()
{
String implementation_buffer_read_range_str;

View File

@ -50,6 +50,8 @@ private:
bool nextImplStep();
void assertCorrectness() const;
enum class ReadType
{
CACHED,

Some files were not shown because too many files have changed in this diff Show More