Merge branch 'master' into fix_storage_distributed_ttl

This commit is contained in:
mergify[bot] 2022-04-18 17:31:59 +00:00 committed by GitHub
commit daca157111
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
182 changed files with 644 additions and 2159 deletions

View File

@ -2,7 +2,7 @@
name: Debug
'on':
[push, pull_request, release]
[push, pull_request, release, workflow_dispatch]
jobs:
DebugInfo:

View File

@ -314,6 +314,15 @@ if (ENABLE_BUILD_PATH_MAPPING)
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
endif ()
option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF)
if (ENABLE_BUILD_PROFILING)
if (COMPILER_CLANG)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ftime-trace")
else ()
message (${RECONFIGURE_MESSAGE_LEVEL} "Build profiling is only available with CLang")
endif ()
endif ()
if (${CMAKE_VERSION} VERSION_LESS "3.12.4")
# CMake < 3.12 doesn't support setting 20 as a C++ standard version.
# We will add C++ standard controlling flag in CMAKE_CXX_FLAGS manually for now.

View File

@ -14,8 +14,8 @@
uint64_t getAvailableMemoryAmountOrZero()
{
#if defined(_SC_AVPHYS_PAGES) // linux
return getPageSize() * sysconf(_SC_AVPHYS_PAGES);
#if defined(_SC_PHYS_PAGES) // linux
return getPageSize() * sysconf(_SC_PHYS_PAGES);
#elif defined(__FreeBSD__)
struct vmtotal vmt;
size_t vmt_size = sizeof(vmt);

View File

@ -2,7 +2,7 @@
#pragma clang diagnostic ignored "-Wreserved-identifier"
#endif
/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/
#include <base/defines.h>

View File

@ -1,6 +1,6 @@
#pragma once
/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/
/** Collects all dl_phdr_info items and caches them in a static array.
* Also rewrites dl_iterate_phdr with a lock-free version which consults the above cache

View File

@ -76,10 +76,10 @@ public:
/// return none if daemon doesn't exist, reference to the daemon otherwise
static std::optional<std::reference_wrapper<BaseDaemon>> tryGetInstance() { return tryGetInstance<BaseDaemon>(); }
/// В Graphite компоненты пути(папки) разделяются точкой.
/// У нас принят путь формата root_path.hostname_yandex_ru.key
/// root_path по умолчанию one_min
/// key - лучше группировать по смыслу. Например "meminfo.cached" или "meminfo.free", "meminfo.total"
/// Graphite metric name has components separated by dots.
/// We used to have the following format: root_path.hostname_clickhouse_com.key
/// root_path - one_min by default
/// key - something that makes sense. Examples: "meminfo.cached" or "meminfo.free", "meminfo.total".
template <class T>
void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
{

2
contrib/sysroot vendored

@ -1 +1 @@
Subproject commit bbcac834526d90d1e764164b861be426891d1743
Subproject commit e9fb375d0a1e5ebfd74c043f088f2342552103f8

18
debian/.gitignore vendored
View File

@ -1,18 +0,0 @@
control
copyright
tmp/
clickhouse-benchmark/
clickhouse-client.docs
clickhouse-client/
clickhouse-common-static-dbg/
clickhouse-common-static.docs
clickhouse-common-static/
clickhouse-server-base/
clickhouse-server-common/
clickhouse-server/
debhelper-build-stamp
files
*.debhelper.log
*.debhelper
*.substvars

223
debian/.pbuilderrc vendored
View File

@ -1,223 +0,0 @@
#
# sudo apt install pbuilder fakeroot debhelper debian-archive-keyring debian-keyring
#
# ubuntu:
# prepare old (trusty or earlier) host system:
# sudo ln -s gutsy /usr/share/debootstrap/scripts/eoan
# sudo ln -s gutsy /usr/share/debootstrap/scripts/disco
# sudo ln -s gutsy /usr/share/debootstrap/scripts/cosmic
# sudo ln -s gutsy /usr/share/debootstrap/scripts/artful
# sudo ln -s gutsy /usr/share/debootstrap/scripts/bionic
# sudo ln -s sid /usr/share/debootstrap/scripts/buster
# build ubuntu:
# sudo DIST=bionic pbuilder create --configfile debian/.pbuilderrc && DIST=bionic pdebuild --configfile debian/.pbuilderrc
# sudo DIST=cosmic pbuilder create --configfile debian/.pbuilderrc && DIST=cosmic pdebuild --configfile debian/.pbuilderrc
# sudo DIST=disco pbuilder create --configfile debian/.pbuilderrc && DIST=disco pdebuild --configfile debian/.pbuilderrc
# sudo DIST=eoan pbuilder create --configfile debian/.pbuilderrc && DIST=eoan pdebuild --configfile debian/.pbuilderrc
# sudo DIST=devel pbuilder create --configfile debian/.pbuilderrc && DIST=devel pdebuild --configfile debian/.pbuilderrc
# build debian:
# sudo DIST=stable pbuilder create --configfile debian/.pbuilderrc && DIST=stable pdebuild --configfile debian/.pbuilderrc
# sudo DIST=testing pbuilder create --configfile debian/.pbuilderrc && DIST=testing pdebuild --configfile debian/.pbuilderrc
# sudo DIST=unstable pbuilder create --configfile debian/.pbuilderrc && DIST=unstable pdebuild --configfile debian/.pbuilderrc
# sudo DIST=experimental pbuilder create --configfile debian/.pbuilderrc && DIST=experimental pdebuild --configfile debian/.pbuilderrc
# build i386 experimental:
# sudo DIST=trusty ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=trusty ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=xenial ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=xenial ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=zesty ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=zesty ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=artful ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=artful ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=bionic ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=bionic ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=stable ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=stable ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=testing ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=testing ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# sudo DIST=experimental ARCH=i386 pbuilder create --configfile debian/.pbuilderrc && DIST=experimental ARCH=i386 pdebuild --configfile debian/.pbuilderrc
# test gcc-9
# env DEB_CC=gcc-9 DEB_CXX=g++-9 EXTRAPACKAGES="g++-9 gcc-9" DIST=disco pdebuild --configfile debian/.pbuilderrc
# use only clang:
# env DEB_CC=clang-8 DEB_CXX=clang++-8 EXTRAPACKAGES=clang-8 DIST=disco pdebuild --configfile debian/.pbuilderrc
# env DEB_CC=clang-5.0 DEB_CXX=clang++-5.0 EXTRAPACKAGES=clang-5.0 DIST=artful pdebuild --configfile debian/.pbuilderrc
# clang+asan:
# env DEB_CC=clang-5.0 DEB_CXX=clang++-5.0 EXTRAPACKAGES="clang-5.0 libc++abi-dev libc++-dev" CMAKE_FLAGS="-DENABLE_TCMALLOC=0 -DENABLE_UNWIND=0 -DCMAKE_BUILD_TYPE=Asan" DIST=artful pdebuild --configfile debian/.pbuilderrc
# clang+tsan:
# env DEB_CC=clang-5.0 DEB_CXX=clang++-5.0 EXTRAPACKAGES="clang-5.0 libc++abi-dev libc++-dev" CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Tsan" DIST=artful pdebuild --configfile debian/.pbuilderrc
# without sse for old systems and some VM:
# env DH_VERBOSE=1 CMAKE_FLAGS="-DHAVE_SSE41=0 -DHAVE_SSE42=0 -DHAVE_POPCNT=0 -DHAVE_SSE2_INTRIN=0 -DSSE2FLAG=' ' -DHAVE_SSE42_INTRIN=0 -DSSE4FLAG=' ' -DHAVE_PCLMULQDQ_INTRIN=0 -DPCLMULFLAG=' '" DIST=artful pdebuild --configfile debian/.pbuilderrc
# Note: on trusty host creating some future dists can fail (debootstrap error).
# Your packages built here: /var/cache/pbuilder/*-*/result
# from https://wiki.debian.org/PbuilderTricks :
# Codenames for Debian suites according to their alias. Update these when
# needed.
UNSTABLE_CODENAME="sid"
TESTING_CODENAME="buster"
STABLE_CODENAME="stretch"
STABLE_BACKPORTS_SUITE="$STABLE_CODENAME-backports"
# List of Debian suites.
DEBIAN_SUITES=($UNSTABLE_CODENAME $TESTING_CODENAME $STABLE_CODENAME $STABLE_BACKPORTS_SUITE
"experimental" "unstable" "testing" "stable")
# List of Ubuntu suites. Update these when needed.
UBUNTU_SUITES=("eoan" "disco" "cosmic" "bionic" "artful" "zesty" "xenial" "trusty" "devel")
# Set a default distribution if none is used. Note that you can set your own default (i.e. ${DIST:="unstable"}).
HOST_DIST=`lsb_release --short --codename`
: ${DIST:="$HOST_DIST"}
# Optionally change Debian codenames in $DIST to their aliases.
case "$DIST" in
$UNSTABLE_CODENAME)
DIST="unstable"
;;
$TESTING_CODENAME)
DIST="testing"
;;
$STABLE_CODENAME)
DIST="stable"
;;
esac
# Optionally set the architecture to the host architecture if none set. Note
# that you can set your own default (i.e. ${ARCH:="i386"}).
: ${ARCH:="$(dpkg --print-architecture)"}
NAME="$DIST"
if [ -n "${ARCH}" ]; then
NAME="$NAME-$ARCH"
DEBOOTSTRAPOPTS=("--arch" "$ARCH" "${DEBOOTSTRAPOPTS[@]}")
fi
BASETGZ=${SET_BASETGZ}
BASETGZ=${BASETGZ:="/var/cache/pbuilder/$NAME-base.tgz"}
DISTRIBUTION="$DIST"
BUILDRESULT=${SET_BUILDRESULT}
BUILDRESULT=${BUILDRESULT:="/var/cache/pbuilder/$NAME/result/"}
APTCACHE="/var/cache/pbuilder/$NAME/aptcache/"
BUILDPLACE="/var/cache/pbuilder/build/"
ALLOWUNTRUSTED=${SET_ALLOWUNTRUSTED:=${ALLOWUNTRUSTED}}
#DEBOOTSTRAPOPTS=( '--variant=buildd' $SET_DEBOOTSTRAPOPTS )
if $(echo ${DEBIAN_SUITES[@]} | grep -q $DIST); then
# Debian configuration
OSNAME=debian
MIRRORSITE=${SET_MIRRORSITE="http://deb.debian.org/$OSNAME/"}
COMPONENTS="main contrib non-free"
if $(echo "$STABLE_CODENAME stable" | grep -q $DIST); then
OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $STABLE_BACKPORTS_SUITE $COMPONENTS"
fi
# APTKEYRINGS=/usr/share/keyrings/debian-archive-keyring.gpg
case "$HOST_DIST" in
"trusty" )
DEBOOTSTRAPOPTS+=( '--no-check-gpg' )
;;
*)
DEBOOTSTRAPOPTS+=( '--keyring' '/usr/share/keyrings/debian-archive-keyring.gpg' )
# DEBOOTSTRAPOPTS+=( '--keyring' '/usr/share/keyrings/debian-keyring.gpg' )
esac
elif $(echo ${UBUNTU_SUITES[@]} | grep -q $DIST); then
# Ubuntu configuration
OSNAME=ubuntu
if [[ "$ARCH" == "amd64" || "$ARCH" == "i386" ]]; then
MIRRORSITE=${SET_MIRRORSITE="http://archive.ubuntu.com/$OSNAME/"}
else
MIRRORSITE=${SET_MIRRORSITE="http://ports.ubuntu.com/ubuntu-ports/"}
fi
COMPONENTS="main restricted universe multiverse"
OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $DIST-updates main restricted universe multiverse"
OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $DIST-security main restricted universe multiverse"
OTHERMIRROR="$OTHERMIRROR | deb $MIRRORSITE $DIST-proposed main restricted universe multiverse"
case "$DIST" in
"trusty" | "xenial" )
OTHERMIRROR="$OTHERMIRROR | deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/$OSNAME $DIST main"
ALLOWUNTRUSTED=yes
;;
esac
# deb http://apt.llvm.org/zesty/ llvm-toolchain-zesty-5.0 main
else
echo "Unknown distribution: $DIST"
exit 1
fi
echo "using $NAME $OSNAME $DIST $ARCH $LOGNAME $MIRRORSITE"
case "$DIST" in
"trusty")
# ccache broken
;;
*)
CCACHEDIR=${SET_CCACHEDIR:="/var/cache/pbuilder/ccache"}
;;
esac
# old systems with default gcc <= 6
case "$DIST" in
"trusty" | "xenial" | "stable" )
export DEB_CC=gcc-7
export DEB_CXX=g++-7
;;
esac
if [ "$ARCH" != arm64 ]; then
case "$DIST" in
# TODO: fix llvm-8 and use for "disco" and "eoan"
"experimental")
EXTRAPACKAGES+=" liblld-8-dev libclang-8-dev llvm-8-dev liblld-8 "
export CMAKE_FLAGS="-DLLVM_VERSION=8 $CMAKE_FLAGS"
;;
"eoan" | "disco" | "cosmic" | "testing" | "unstable")
EXTRAPACKAGES+=" liblld-7-dev libclang-7-dev llvm-7-dev liblld-7 "
export CMAKE_FLAGS="-DLLVM_VERSION=7 $CMAKE_FLAGS"
;;
"bionic")
EXTRAPACKAGES+=" liblld-6.0-dev libclang-6.0-dev liblld-6.0 "
export CMAKE_FLAGS="-DLLVM_VERSION=6 $CMAKE_FLAGS"
;;
"artful" )
EXTRAPACKAGES+=" liblld-5.0-dev libclang-5.0-dev liblld-5.0 "
;;
esac
else
export CMAKE_FLAGS="-DENABLE_EMBEDDED_COMPILER=0 $CMAKE_FLAGS"
fi
# Will test symbols
#EXTRAPACKAGES+=" gdb "
# For killall in pbuilder-hooks:
EXTRAPACKAGES+=" psmisc "
[[ $CCACHE_PREFIX == 'distcc' ]] && EXTRAPACKAGES+=" $CCACHE_PREFIX " && USENETWORK=yes && export DISTCC_DIR=/var/cache/pbuilder/distcc
[[ $ARCH == 'i386' ]] && EXTRAPACKAGES+=" libssl-dev "
export DEB_BUILD_OPTIONS=parallel=`nproc`
# Floating bug with permissions:
[ -n "$CCACHEDIR" ] && sudo mkdir -p $CCACHEDIR
[ -n "$CCACHEDIR" ] && sudo chmod -R a+rwx $CCACHEDIR || true
# chown -R $BUILDUSERID:$BUILDUSERID $CCACHEDIR
# Do not create source package inside pbuilder (-b)
# Use current dir to make package (by default should have src archive)
# echo "3.0 (native)" > debian/source/format
# OR
# pdebuild -b --debbuildopts "--source-option=--format=\"3.0 (native)\""
# OR
DEBBUILDOPTS="-b --source-option=--format=\"3.0 (native)\""
HOOKDIR="debian/pbuilder-hooks"
#echo "DEBOOTSTRAPOPTS=${DEBOOTSTRAPOPTS[@]}"
#echo "ALLOWUNTRUSTED=${ALLOWUNTRUSTED} OTHERMIRROR=${OTHERMIRROR}"
#echo "EXTRAPACKAGES=${EXTRAPACKAGES}"

5
debian/changelog vendored
View File

@ -1,5 +0,0 @@
clickhouse (22.1.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Thu, 09 Dec 2021 00:32:58 +0300

5
debian/changelog.in vendored
View File

@ -1,5 +0,0 @@
clickhouse (@VERSION_STRING@) unstable; urgency=low
* Modified source code
-- @AUTHOR@ <@EMAIL@> @DATE@

View File

@ -1,7 +0,0 @@
usr/bin/clickhouse-client
usr/bin/clickhouse-local
usr/bin/clickhouse-compressor
usr/bin/clickhouse-benchmark
usr/bin/clickhouse-format
usr/bin/clickhouse-obfuscator
etc/clickhouse-client/config.xml

View File

@ -1,5 +0,0 @@
usr/bin/clickhouse
usr/bin/clickhouse-odbc-bridge
usr/bin/clickhouse-library-bridge
usr/bin/clickhouse-extract-from-config
usr/share/bash-completion/completions

View File

@ -1 +0,0 @@
#*/10 * * * * root ((which service > /dev/null 2>&1 && (service clickhouse-server condstart ||:)) || /etc/init.d/clickhouse-server condstart) > /dev/null 2>&1

View File

@ -1,4 +0,0 @@
LICENSE
AUTHORS
README.md
CHANGELOG.md

View File

@ -1,227 +0,0 @@
#!/bin/sh
### BEGIN INIT INFO
# Provides: clickhouse-server
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Should-Start: $time $network
# Should-Stop: $network
# Short-Description: clickhouse-server daemon
### END INIT INFO
#
# NOTES:
# - Should-* -- script can start if the listed facilities are missing, unlike Required-*
#
# For the documentation [1]:
#
# [1]: https://wiki.debian.org/LSBInitScripts
CLICKHOUSE_USER=clickhouse
CLICKHOUSE_GROUP=${CLICKHOUSE_USER}
SHELL=/bin/bash
PROGRAM=clickhouse-server
CLICKHOUSE_GENERIC_PROGRAM=clickhouse
CLICKHOUSE_PROGRAM_ENV=""
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
CLICKHOUSE_CONFDIR=/etc/$PROGRAM
CLICKHOUSE_LOGDIR=/var/log/clickhouse-server
CLICKHOUSE_LOGDIR_USER=root
CLICKHOUSE_DATADIR=/var/lib/clickhouse
if [ -d "/var/lock" ]; then
LOCALSTATEDIR=/var/lock
else
LOCALSTATEDIR=/run/lock
fi
if [ ! -d "$LOCALSTATEDIR" ]; then
mkdir -p "$LOCALSTATEDIR"
fi
CLICKHOUSE_BINDIR=/usr/bin
CLICKHOUSE_CRONFILE=/etc/cron.d/clickhouse-server
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
LOCKFILE=$LOCALSTATEDIR/$PROGRAM
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
CLICKHOUSE_PIDFILE="$CLICKHOUSE_PIDDIR/$PROGRAM.pid"
# CLICKHOUSE_STOP_TIMEOUT=60 # Disabled by default. Place to /etc/default/clickhouse if you need.
# Some systems lack "flock"
command -v flock >/dev/null && FLOCK=flock
# Override defaults from optional config file
test -f /etc/default/clickhouse && . /etc/default/clickhouse
die()
{
echo $1 >&2
exit 1
}
# Check that configuration file is Ok.
check_config()
{
if [ -x "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG" ]; then
su -s $SHELL ${CLICKHOUSE_USER} -c "$CLICKHOUSE_BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure.";
fi
}
initdb()
{
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}
start()
{
${CLICKHOUSE_GENERIC_PROGRAM} start --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}
stop()
{
${CLICKHOUSE_GENERIC_PROGRAM} stop --pid-path "${CLICKHOUSE_PIDDIR}"
}
restart()
{
${CLICKHOUSE_GENERIC_PROGRAM} restart --user "${CLICKHOUSE_USER}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}"
}
forcestop()
{
${CLICKHOUSE_GENERIC_PROGRAM} stop --force --pid-path "${CLICKHOUSE_PIDDIR}"
}
service_or_func()
{
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
systemctl $1 $PROGRAM
else
$1
fi
}
forcerestart()
{
forcestop
# Should not use 'start' function if systemd active
service_or_func start
}
use_cron()
{
# 1. running systemd
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
return 1
fi
# 2. disabled by config
if [ -z "$CLICKHOUSE_CRONFILE" ]; then
return 2
fi
return 0
}
# returns false if cron disabled (with systemd)
enable_cron()
{
use_cron && sed -i 's/^#*//' "$CLICKHOUSE_CRONFILE"
}
# returns false if cron disabled (with systemd)
disable_cron()
{
use_cron && sed -i 's/^#*/#/' "$CLICKHOUSE_CRONFILE"
}
is_cron_disabled()
{
use_cron || return 0
# Assumes that either no lines are commented or all lines are commented.
# Also please note, that currently cron file for ClickHouse has only one line (but some time ago there was more).
grep -q -E '^#' "$CLICKHOUSE_CRONFILE";
}
main()
{
# See how we were called.
EXIT_STATUS=0
case "$1" in
start)
service_or_func start && enable_cron
;;
stop)
disable_cron
service_or_func stop
;;
restart)
service_or_func restart && enable_cron
;;
forcestop)
disable_cron
forcestop
;;
forcerestart)
forcerestart && enable_cron
;;
reload)
service_or_func restart
;;
condstart)
service_or_func start
;;
condstop)
service_or_func stop
;;
condrestart)
service_or_func restart
;;
condreload)
service_or_func restart
;;
initdb)
initdb
;;
enable_cron)
enable_cron
;;
disable_cron)
disable_cron
;;
*)
echo "Usage: $0 {start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload|initdb}"
exit 2
;;
esac
exit $EXIT_STATUS
}
status()
{
${CLICKHOUSE_GENERIC_PROGRAM} status --pid-path "${CLICKHOUSE_PIDDIR}"
}
# Running commands without need of locking
case "$1" in
status)
status
exit 0
;;
esac
(
if $FLOCK -n 9; then
main "$@"
else
echo "Init script is already running" && exit 1
fi
) 9> $LOCKFILE

View File

@ -1,6 +0,0 @@
usr/bin/clickhouse-server
usr/bin/clickhouse-copier
usr/bin/clickhouse-report
etc/clickhouse-server/config.xml
etc/clickhouse-server/users.xml
etc/systemd/system/clickhouse-server.service

View File

@ -1,47 +0,0 @@
#!/bin/sh
set -e
# set -x
PROGRAM=clickhouse-server
CLICKHOUSE_USER=${CLICKHOUSE_USER:=clickhouse}
CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP:=${CLICKHOUSE_USER}}
# Please note that we don't support paths with whitespaces. This is rather ignorant.
CLICKHOUSE_CONFDIR=${CLICKHOUSE_CONFDIR:=/etc/clickhouse-server}
CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR:=/var/lib/clickhouse}
CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR:=/var/log/clickhouse-server}
CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
[ -f /etc/default/clickhouse ] && . /etc/default/clickhouse
if [ ! -f "/etc/debian_version" ]; then
not_deb_os=1
fi
if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
# if old rc.d service present - remove it
if [ -x "/etc/init.d/clickhouse-server" ] && [ -x "/usr/sbin/update-rc.d" ]; then
/usr/sbin/update-rc.d clickhouse-server remove
fi
/bin/systemctl daemon-reload
/bin/systemctl enable clickhouse-server
else
# If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server
if [ -x "/etc/init.d/clickhouse-server" ]; then
if [ -x "/usr/sbin/update-rc.d" ]; then
/usr/sbin/update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $?
else
echo # Other OS
fi
fi
fi
fi

View File

@ -1,27 +0,0 @@
[Unit]
Description=ClickHouse Server (analytic DBMS for big data)
Requires=network-online.target
# NOTE: that After/Wants=time-sync.target is not enough, you need to ensure
# that the time was adjusted already, if you use systemd-timesyncd you are
# safe, but if you use ntp or some other daemon, you should configure it
# additionaly.
After=time-sync.target network-online.target
Wants=time-sync.target
[Service]
Type=simple
User=clickhouse
Group=clickhouse
Restart=always
RestartSec=30
RuntimeDirectory=clickhouse-server
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
# Minus means that this file is optional.
EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
[Install]
# ClickHouse should not start from the rescue shell (rescue.target).
WantedBy=multi-user.target

1
debian/compat vendored
View File

@ -1 +0,0 @@
10

58
debian/control vendored
View File

@ -1,58 +0,0 @@
Source: clickhouse
Section: database
Priority: optional
Maintainer: Alexey Milovidov <milovidov@clickhouse.com>
Build-Depends: debhelper (>= 9),
cmake | cmake3,
ninja-build,
clang-13,
llvm-13,
lld-13,
libc6-dev,
tzdata
Standards-Version: 3.9.8
Package: clickhouse-client
Architecture: all
Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version})
Replaces: clickhouse-compressor
Conflicts: clickhouse-compressor
Description: Client binary for ClickHouse
ClickHouse is a column-oriented database management system
that allows generating analytical data reports in real time.
.
This package provides clickhouse-client , clickhouse-local and clickhouse-benchmark
Package: clickhouse-common-static
Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends}
Suggests: clickhouse-common-static-dbg
Replaces: clickhouse-common, clickhouse-server-base
Provides: clickhouse-common, clickhouse-server-base
Description: Common files for ClickHouse
ClickHouse is a column-oriented database management system
that allows generating analytical data reports in real time.
.
This package provides common files for both clickhouse server and client
Package: clickhouse-server
Architecture: all
Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-common-static (= ${binary:Version}), adduser
Recommends: libcap2-bin
Replaces: clickhouse-server-common, clickhouse-server-base
Provides: clickhouse-server-common
Description: Server binary for ClickHouse
ClickHouse is a column-oriented database management system
that allows generating analytical data reports in real time.
.
This package provides clickhouse common configuration files
Package: clickhouse-common-static-dbg
Architecture: any
Section: debug
Priority: optional
Depends: ${misc:Depends}
Replaces: clickhouse-common-dbg
Conflicts: clickhouse-common-dbg
Description: debugging symbols for clickhouse-common-static
This package contains the debugging symbols for clickhouse-common.

132
debian/rules vendored
View File

@ -1,132 +0,0 @@
#!/usr/bin/make -f
# -*- makefile -*-
# Uncomment this to turn on verbose mode.
export DH_VERBOSE=1
# -pie only for static mode
export DEB_BUILD_MAINT_OPTIONS=hardening=-all
# because copy_headers.sh have hardcoded path to build/include_directories.txt
BUILDDIR = obj-$(DEB_HOST_GNU_TYPE)
CURDIR = $(shell pwd)
DESTDIR = $(CURDIR)/debian/tmp
DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH)
ifeq ($(CCACHE_PREFIX),distcc)
THREADS_COUNT=$(shell distcc -j)
endif
ifeq ($(THREADS_COUNT),)
THREADS_COUNT=$(shell nproc || grep -c ^processor /proc/cpuinfo || sysctl -n hw.ncpu || echo 4)
endif
DEB_BUILD_OPTIONS+=parallel=$(THREADS_COUNT)
ifndef ENABLE_TESTS
CMAKE_FLAGS += -DENABLE_TESTS=0
else
# To export binaries and from deb build we do not strip them. No need to run tests in deb build as we run them in CI
DEB_BUILD_OPTIONS+= nocheck
DEB_BUILD_OPTIONS+= nostrip
endif
ifndef MAKE_TARGET
MAKE_TARGET = clickhouse-bundle
endif
CMAKE_FLAGS += -DENABLE_UTILS=0
DEB_CC ?= $(shell which gcc-11 gcc-10 gcc-9 gcc | head -n1)
DEB_CXX ?= $(shell which g++-11 g++-10 g++-9 g++ | head -n1)
ifdef DEB_CXX
DEB_BUILD_GNU_TYPE := $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
DEB_HOST_GNU_TYPE := $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
ifeq ($(DEB_BUILD_GNU_TYPE),$(DEB_HOST_GNU_TYPE))
CC := $(DEB_CC)
CXX := $(DEB_CXX)
else ifeq (clang,$(findstring clang,$(DEB_CXX)))
# If we crosscompile with clang, it knows what to do
CC := $(DEB_CC)
CXX := $(DEB_CXX)
else
CC := $(DEB_HOST_GNU_TYPE)-$(DEB_CC)
CXX := $(DEB_HOST_GNU_TYPE)-$(DEB_CXX)
endif
endif
ifdef CXX
CMAKE_FLAGS += -DCMAKE_CXX_COMPILER=`which $(CXX)`
endif
ifdef CC
CMAKE_FLAGS += -DCMAKE_C_COMPILER=`which $(CC)`
endif
ifndef DISABLE_NINJA
NINJA=$(shell which ninja)
ifneq ($(NINJA),)
CMAKE_FLAGS += -GNinja
export MAKE=$(NINJA) $(NINJA_FLAGS)
endif
endif
ifndef DH_VERBOSE
CMAKE_FLAGS += -DCMAKE_VERBOSE_MAKEFILE=0
endif
# Useful for bulding on low memory systems
ifndef DISABLE_PARALLEL
DH_FLAGS += --parallel
else
THREADS_COUNT = 1
endif
%:
dh $@ $(DH_FLAGS) --buildsystem=cmake
override_dh_auto_configure:
dh_auto_configure -- $(CMAKE_FLAGS)
override_dh_auto_build:
# Fix for ninja. Do not add -O.
$(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET)
override_dh_auto_test:
ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
cd $(BUILDDIR) && ctest -j$(THREADS_COUNT) -V
endif
# Disable config.guess and config.sub update
override_dh_update_autotools_config:
override_dh_clean:
rm -rf debian/copyright debian/clickhouse-client.docs debian/clickhouse-common-static.docs
dh_clean # -X contrib
override_dh_strip:
#https://www.debian.org/doc/debian-policy/ch-source.html#debian-rules-and-deb-build-options
ifeq (,$(filter nostrip,$(DEB_BUILD_OPTIONS)))
dh_strip -pclickhouse-common-static --dbg-package=clickhouse-common-static-dbg
endif
override_dh_install:
# Making docs
cp LICENSE debian/copyright
ln -sf clickhouse-server.docs debian/clickhouse-client.docs
ln -sf clickhouse-server.docs debian/clickhouse-common-static.docs
# systemd compatibility
mkdir -p $(DESTDIR)/etc/systemd/system/
cp debian/clickhouse-server.service $(DESTDIR)/etc/systemd/system/
dh_install --list-missing --sourcedir=$(DESTDIR)
override_dh_auto_install:
env DESTDIR=$(DESTDIR) $(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) install
override_dh_shlibdeps:
true # We depend only on libc and dh_shlibdeps gives us wrong (too strict) dependency.
override_dh_builddeb:
dh_builddeb -- -Z gzip # Older systems don't have "xz", so use "gzip" instead.

View File

@ -1 +0,0 @@
3.0 (quilt)

View File

@ -1,9 +0,0 @@
tar-ignore
tar-ignore="build_*/*"
tar-ignore="workspace/*"
tar-ignore="contrib/poco/openssl/*"
tar-ignore="contrib/poco/gradle/*"
tar-ignore="contrib/poco/Data/SQLite/*"
tar-ignore="contrib/poco/PDF/*"
compression-level=3
compression=gzip

6
debian/watch vendored
View File

@ -1,6 +0,0 @@
version=4
opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)-stable\.tar\.gz%clickhouse-$1.tar.gz%" \
https://github.com/ClickHouse/ClickHouse/tags \
(?:.*?/)?v?(\d[\d.]*)-stable\.tar\.gz debian uupdate

View File

@ -362,19 +362,6 @@ function get_profiles
clickhouse-client --port $RIGHT_SERVER_PORT --query "select 1"
}
function build_log_column_definitions
{
# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
# absolutely atrocious way. This should be done by the file() function itself.
for x in {right,left}-{addresses,{query,query-thread,trace,{async-,}metric}-log}.tsv
do
paste -d' ' \
<(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
<(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
| tr '\n' ', ' | sed 's/,$//' > "$x.columns"
done
}
# Build and analyze randomization distribution for all queries.
function analyze_queries
{
@ -382,8 +369,6 @@ rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.t
rm -rf analyze ||:
mkdir analyze analyze/tmp ||:
build_log_column_definitions
# Split the raw test output into files suitable for analysis.
# To debug calculations only for a particular test, substitute a suitable
# wildcard here, e.g. `for test_file in modulo-raw.tsv`.
@ -422,12 +407,10 @@ create table partial_query_times engine File(TSVWithNamesAndTypes,
-- Process queries that were run normally, on both servers.
create view left_query_log as select *
from file('left-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "left-query-log.tsv.columns")');
from file('left-query-log.tsv', TSVWithNamesAndTypes);
create view right_query_log as select *
from file('right-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "right-query-log.tsv.columns")');
from file('right-query-log.tsv', TSVWithNamesAndTypes);
create view query_logs as
select 0 version, query_id, ProfileEvents,
@ -645,8 +628,6 @@ mkdir report report/tmp ||:
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv run-errors.tsv ||:
build_log_column_definitions
cat analyze/errors.log >> report/errors.log ||:
cat profile-errors.log >> report/errors.log ||:
@ -1028,8 +1009,7 @@ create table unstable_query_runs engine File(TSVWithNamesAndTypes,
;
create view query_log as select *
from file('$version-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "$version-query-log.tsv.columns")');
from file('$version-query-log.tsv', TSVWithNamesAndTypes);
create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
'unstable-run-metrics.$version.rep') as
@ -1057,8 +1037,7 @@ create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
array join v, n;
create view trace_log as select *
from file('$version-trace-log.tsv', TSVWithNamesAndTypes,
'$(cat "$version-trace-log.tsv.columns")');
from file('$version-trace-log.tsv', TSVWithNamesAndTypes);
create view addresses_src as select addr,
-- Some functions change name between builds, e.g. '__clone' or 'clone' or
@ -1067,8 +1046,7 @@ create view addresses_src as select addr,
[name, 'clone.S (filtered by script)', 'pthread_cond_timedwait (filtered by script)']
-- this line is a subscript operator of the above array
[1 + multiSearchFirstIndex(name, ['clone.S', 'pthread_cond_timedwait'])] name
from file('$version-addresses.tsv', TSVWithNamesAndTypes,
'$(cat "$version-addresses.tsv.columns")');
from file('$version-addresses.tsv', TSVWithNamesAndTypes);
create table addresses_join_$version engine Join(any, left, address) as
select addr address, name from addresses_src;
@ -1195,15 +1173,12 @@ done
function report_metrics
{
build_log_column_definitions
rm -rf metrics ||:
mkdir metrics
clickhouse-local --query "
create view right_async_metric_log as
select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes,
'$(cat right-async-metric-log.tsv.columns)')
select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes)
;
-- Use the right log as time reference because it may have higher precision.
@ -1211,8 +1186,7 @@ create table metrics engine File(TSV, 'metrics/metrics.tsv') as
with (select min(event_time) from right_async_metric_log) as min_time
select metric, r.event_time - min_time event_time, l.value as left, r.value as right
from right_async_metric_log r
asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes,
'$(cat left-async-metric-log.tsv.columns)') l
asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes) l
on l.metric = r.metric and r.event_time <= l.event_time
order by metric, event_time
;

View File

@ -5,11 +5,10 @@ toc_title: HTTP Interface
# HTTP Interface {#http-interface}
The HTTP interface lets you use ClickHouse on any platform from any programming language. We use it for working from Java and Perl, as well as shell scripts. In other departments, the HTTP interface is used from Perl, Python, and Go. The HTTP interface is more limited than the native interface, but it has better compatibility.
The HTTP interface lets you use ClickHouse on any platform from any programming language in a form of REST API. The HTTP interface is more limited than the native interface, but it has better language support.
By default, `clickhouse-server` listens for HTTP on port 8123 (this can be changed in the config).
Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples.
HTTPS can be enabled as well with port 8443 by default.
If you make a `GET /` request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end)
@ -18,11 +17,12 @@ $ curl 'http://localhost:8123/'
Ok.
```
Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples.
Web UI can be accessed here: `http://localhost:8123/play`.
![Web UI](../images/play.png)
In health-check scripts use `GET /ping` request. This handler always returns “Ok.” (with a line feed at the end). Available from version 18.12.13. See also `/replicas_status` to check replica's delay.
``` bash
@ -32,7 +32,7 @@ $ curl 'http://localhost:8123/replicas_status'
Ok.
```
Send the request as a URL query parameter, or as a POST. Or send the beginning of the query in the query parameter, and the rest in the POST (well explain later why this is necessary). The size of the URL is limited to 16 KB, so keep this in mind when sending large queries.
Send the request as a URL query parameter, or as a POST. Or send the beginning of the query in the query parameter, and the rest in the POST (well explain later why this is necessary). The size of the URL is limited to 1 MiB by default, this can be changed with the `http_max_uri_size` setting.
If successful, you receive the 200 response code and the result in the response body.
If an error occurs, you receive the 500 response code and an error description text in the response body.

View File

@ -28,6 +28,7 @@ toc_title: Adopters
| <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | 1.6 mln events/sec (2018) | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/forecast.pdf) |
| <a href="https://beeline.ru/" class="favicon">Beeline</a> | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) |
| <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
| <a href="https://betterstack.com/" class="favicon">Better Stack</a> | Cloud, SaaS | Log Management | - | - | [Official Website](https://betterstack.com/logtail) |
| <a href="https://www.bigo.sg/" class="favicon">BIGO</a> | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) |
| <a href="https://www.bilibili.com/" class="favicon">BiliBili</a> | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) |
| <a href="https://www.bloomberg.com/">Bloomberg</a> | Finance, Media | Monitoring | — | — | [Job opening, September 2021](https://careers.bloomberg.com/job/detail/94913), [slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
@ -112,7 +113,7 @@ toc_title: Adopters
| <a href="https://nlmk.com/en/" class="favicon">NLMK</a> | Steel | Monitoring | — | — | [Article in Russian, Jan 2022](https://habr.com/en/company/nlmk/blog/645943/) |
| <a href="https://getnoc.com/" class="favicon">NOC Project</a> | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) |
| <a href="https://www.noction.com" class="favicon">Noction</a> | Network Technology | Main Product | — | — | [Official Website](https://www.noction.com/news/irp-3-11-remote-triggered-blackholing-capability)
| <a href="https://www.ntop.org/" class="favicon">ntop</a> | Network Monitoning | Monitoring | — | — | [Official website, Jan 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) |
| <a href="https://www.ntop.org/" class="favicon">ntop</a> | Network Monitoning | Monitoring | — | — | [Official website, January 2022](https://www.ntop.org/ntop/historical-traffic-analysis-at-scale-using-clickhouse-with-ntopng/) |
| <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a> | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) |
| <a href="https://ok.ru" class="favicon">Ok.ru</a> | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, October 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) |
| <a href="https://omnicomm.ru/" class="favicon">Omnicomm</a> | Transportation Monitoring | — | — | — | [Facebook post, October 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) |
@ -123,6 +124,7 @@ toc_title: Adopters
| <a href="https://panelbear.com/" class="favicon">Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) |
| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
| <a href="https://www.percona.com/" class="favicon">Percona</a> | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) |
| <a href="https://pingcap.com/" class="favicon">PingCAP</a> | Analytics | Real-Time Transactional and Analytical Processing | - | - | [GitHub, TiFlash/TiDB](https://github.com/pingcap/tiflash) |
| <a href="https://plausible.io/" class="favicon">Plausible</a> | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) |
| <a href="https://posthog.com/" class="favicon">PostHog</a> | Product Analytics | Main Product | — | — | [Release Notes, October 2020](https://posthog.com/blog/the-posthog-array-1-15-0), [Blog, November 2021](https://posthog.com/blog/how-we-turned-clickhouse-into-our-eventmansion) |
| <a href="https://postmates.com/" class="favicon">Postmates</a> | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) |
@ -159,6 +161,7 @@ toc_title: Adopters
| <a href="https://www.suning.com/" class="favicon">Suning</a> | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) |
| <a href="https://superwall.me/" class="favicon">Superwall</a> | Monetization Tooling | Main product | — | — | [Word of mouth, Jan 2022](https://github.com/ClickHouse/ClickHouse/pull/33573) |
| <a href="https://swetrix.com" class="favicon">Swetrix</a> | Analytics | Main Product | — | — | [Source code](https://github.com/swetrix/swetrix-api) |
| <a href="https://synpse.net/" class="favicon">Synpse</a> | Application Management | Main Product | - | - | [Tweet, January 2022](https://twitter.com/KRusenas/status/1483571168363880455) |
| <a href="https://www.teralytics.net/" class="favicon">Teralytics</a> | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
@ -172,6 +175,7 @@ toc_title: Adopters
| <a href="https://hello.utmstat.com/" class="favicon">UTMSTAT</a> | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) |
| <a href="https://vercel.com/" class="favicon">Vercel</a> | Traffic and Performance Analytics | — | — | — | Direct reference, October 2021 |
| <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
| <a href="https://vkontech.com/" class="favicon">VKontech</a> | Distributed Systems | Migrating from MongoDB | - | - | [Blog, January 2022](https://vkontech.com/migrating-your-reporting-queries-from-a-general-purpose-db-mongodb-to-a-data-warehouse-clickhouse-performance-overview/) |
| <a href="https://www.vmware.com/" class="favicon">VMware</a> | Cloud | VeloCloud, SDN | — | — | [Product documentation](https://docs.vmware.com/en/vRealize-Operations-Manager/8.3/com.vmware.vcom.metrics.doc/GUID-A9AD72E1-C948-4CA2-971B-919385AB3CA8.html) |
| <a href="https://www.walmartlabs.com/" class="favicon">Walmart Labs</a> | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) |
| <a href="https://wargaming.com/en/" class="favicon">Wargaming</a> | Games | | — | — | [Interview](https://habr.com/en/post/496954/) |
@ -197,5 +201,6 @@ toc_title: Adopters
| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
| <a href="https://magenta-technology.ru/sistema-upravleniya-marshrutami-inkassacii-as-strela/" class="favicon">АС "Стрела"</a> | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) |
| <a href="https://piwik.pro/" class="favicon">Piwik PRO</a> | Web Analytics | — | — | — | [Official website, Dec 2018](https://piwik.pro/blog/piwik-pro-clickhouse-faster-efficient-reports/) |
| <a href="https://www.deepglint.com/" class="favicon">Deepglint 格灵深瞳</a> | AI, Computer Vision | OLAP | — | — | [Official Website](https://www.deepglint.com/) |
[Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->

View File

@ -11,10 +11,6 @@ To work with data stored on `Amazon S3` disks use [S3](../engines/table-engines/
To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver).
## Zero-copy Replication {#zero-copy}
ClickHouse supports zero-copy replication for `S3` and `HDFS` disks, which means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.
## Configuring HDFS {#configuring-hdfs}
[MergeTree](../engines/table-engines/mergetree-family/mergetree.md) and [Log](../engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`.
@ -316,3 +312,8 @@ When loading files by `endpoint`, they must be loaded into `<endpoint>/store/` p
If URL is not reachable on disk load when the server is starting up tables, then all errors are caught. If in this case there were errors, tables can be reloaded (become visible) via `DETACH TABLE table_name` -> `ATTACH TABLE table_name`. If metadata was successfully loaded at server startup, then tables are available straight away.
Use [http_max_single_read_retries](../operations/settings/settings.md#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read.
## Zero-copy Replication (not ready for production) {#zero-copy}
ClickHouse supports zero-copy replication for `S3` and `HDFS` disks, which means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.

View File

@ -182,7 +182,7 @@ Marks numbers: 0 1 2 3 4 5 6 7 8
Разреженный индекс допускает чтение лишних строк. При чтении одного диапазона первичного ключа, может быть прочитано до `index_granularity * 2` лишних строк в каждом блоке данных.
Разреженный индекс почти всегда помещаеся в оперативную память и позволяет работать с очень большим количеством строк в таблицах.
Разреженный индекс почти всегда помещается в оперативную память и позволяет работать с очень большим количеством строк в таблицах.
ClickHouse не требует уникального первичного ключа. Можно вставить много строк с одинаковым первичным ключом.

View File

@ -5,9 +5,9 @@ toc_title: Nothing
# Nothing {#nothing}
Этот тип данных предназначен только для того, чтобы представлять [NULL](../../../sql-reference/data-types/special-data-types/nothing.md), т.е. отсутствие значения.
Этот тип данных предназначен только для того, чтобы представлять [NULL](../../../sql-reference/syntax.md#null-literal), т.е. отсутствие значения.
Невозможно создать значение типа `Nothing`, поэтому он используется там, где значение не подразумевается. Например, `NULL` записывается как `Nullable(Nothing)` ([Nullable](../../../sql-reference/data-types/special-data-types/nothing.md) — это тип данных, позволяющий хранить `NULL` в таблицах). Также тип `Nothing` используется для обозначения пустых массивов:
Невозможно создать значение типа `Nothing`, поэтому он используется там, где значение не подразумевается. Например, `NULL` записывается как `Nullable(Nothing)` ([Nullable](../../../sql-reference/data-types/nullable.md) — это тип данных, позволяющий хранить `NULL` в таблицах). Также тип `Nothing` используется для обозначения пустых массивов:
``` sql
SELECT toTypeName(Array())

View File

@ -2,7 +2,7 @@
此数据类型的唯一目的是表示不是期望值的情况。 所以不能创建一个 `Nothing` 类型的值。
例如,文本 [NULL](../../../sql-reference/data-types/special-data-types/nothing.md#null-literal) 的类型为 `Nullable(Nothing)`。详情请见 [可为空](../../../sql-reference/data-types/special-data-types/nothing.md)。
例如,字面量 [NULL](../../../sql-reference/syntax.md#null-literal) 的类型为 `Nullable(Nothing)`。详情请见 [可为空](../../../sql-reference/data-types/nullable.md)。
`Nothing` 类型也可以用来表示空数组:

View File

@ -66,40 +66,40 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
using namespace DB;
namespace po = boost::program_options;
po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
desc.add_options()
("help,h", "produce help message")
("input", po::value<std::string>()->value_name("INPUT"), "input file")
("output", po::value<std::string>()->value_name("OUTPUT"), "output file")
("decompress,d", "decompress")
("offset-in-compressed-file", po::value<size_t>()->default_value(0ULL), "offset to the compressed block (i.e. physical file offset)")
("offset-in-decompressed-block", po::value<size_t>()->default_value(0ULL), "offset to the decompressed block (i.e. virtual offset)")
("block-size,b", po::value<unsigned>()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size")
("hc", "use LZ4HC instead of LZ4")
("zstd", "use ZSTD instead of LZ4")
("codec", po::value<std::vector<std::string>>()->multitoken(), "use codecs combination instead of LZ4")
("level", po::value<int>(), "compression level for codecs specified via flags")
("none", "use no compression instead of LZ4")
("stat", "print block statistics of compressed data")
;
po::positional_options_description positional_desc;
positional_desc.add("input", 1);
positional_desc.add("output", 1);
po::variables_map options;
po::store(po::command_line_parser(argc, argv).options(desc).positional(positional_desc).run(), options);
if (options.count("help"))
{
std::cout << "Usage: " << argv[0] << " [options] < INPUT > OUTPUT" << std::endl;
std::cout << "Usage: " << argv[0] << " [options] INPUT OUTPUT" << std::endl;
std::cout << desc << std::endl;
return 0;
}
try
{
po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
desc.add_options()
("help,h", "produce help message")
("input", po::value<std::string>()->value_name("INPUT"), "input file")
("output", po::value<std::string>()->value_name("OUTPUT"), "output file")
("decompress,d", "decompress")
("offset-in-compressed-file", po::value<size_t>()->default_value(0ULL), "offset to the compressed block (i.e. physical file offset)")
("offset-in-decompressed-block", po::value<size_t>()->default_value(0ULL), "offset to the decompressed block (i.e. virtual offset)")
("block-size,b", po::value<unsigned>()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size")
("hc", "use LZ4HC instead of LZ4")
("zstd", "use ZSTD instead of LZ4")
("codec", po::value<std::vector<std::string>>()->multitoken(), "use codecs combination instead of LZ4")
("level", po::value<int>(), "compression level for codecs specified via flags")
("none", "use no compression instead of LZ4")
("stat", "print block statistics of compressed data")
;
po::positional_options_description positional_desc;
positional_desc.add("input", 1);
positional_desc.add("output", 1);
po::variables_map options;
po::store(po::command_line_parser(argc, argv).options(desc).positional(positional_desc).run(), options);
if (options.count("help"))
{
std::cout << "Usage: " << argv[0] << " [options] < INPUT > OUTPUT" << std::endl;
std::cout << "Usage: " << argv[0] << " [options] INPUT OUTPUT" << std::endl;
std::cout << desc << std::endl;
return 0;
}
bool decompress = options.count("decompress");
bool use_lz4hc = options.count("hc");
bool use_zstd = options.count("zstd");

View File

@ -44,40 +44,40 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
{
using namespace DB;
boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
desc.add_options()
("query", po::value<std::string>(), "query to format")
("help,h", "produce help message")
("hilite", "add syntax highlight with ANSI terminal escape sequences")
("oneline", "format in single line")
("quiet,q", "just check syntax, no output on success")
("multiquery,n", "allow multiple queries in the same file")
("obfuscate", "obfuscate instead of formatting")
("backslash", "add a backslash at the end of each line of the formatted query")
("allow_settings_after_format_in_insert", "Allow SETTINGS after FORMAT, but note, that this is not always safe")
("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")
;
Settings cmd_settings;
for (const auto & field : cmd_settings.all())
{
if (field.getName() == "max_parser_depth" || field.getName() == "max_query_size")
cmd_settings.addProgramOption(desc, field);
}
boost::program_options::variables_map options;
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
po::notify(options);
if (options.count("help"))
{
std::cout << "Usage: " << argv[0] << " [options] < query" << std::endl;
std::cout << desc << std::endl;
return 1;
}
try
{
boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
desc.add_options()
("query", po::value<std::string>(), "query to format")
("help,h", "produce help message")
("hilite", "add syntax highlight with ANSI terminal escape sequences")
("oneline", "format in single line")
("quiet,q", "just check syntax, no output on success")
("multiquery,n", "allow multiple queries in the same file")
("obfuscate", "obfuscate instead of formatting")
("backslash", "add a backslash at the end of each line of the formatted query")
("allow_settings_after_format_in_insert", "Allow SETTINGS after FORMAT, but note, that this is not always safe")
("seed", po::value<std::string>(), "seed (arbitrary string) that determines the result of obfuscation")
;
Settings cmd_settings;
for (const auto & field : cmd_settings.all())
{
if (field.getName() == "max_parser_depth" || field.getName() == "max_query_size")
cmd_settings.addProgramOption(desc, field);
}
boost::program_options::variables_map options;
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
po::notify(options);
if (options.count("help"))
{
std::cout << "Usage: " << argv[0] << " [options] < query" << std::endl;
std::cout << desc << std::endl;
return 1;
}
bool hilite = options.count("hilite");
bool oneline = options.count("oneline");
bool quiet = options.count("quiet");

View File

@ -1231,5 +1231,5 @@ try
catch (...)
{
std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
throw;
return DB::getCurrentExceptionCode();
}

View File

@ -13,6 +13,8 @@
#include <tuple>
#include <utility> /// pair
#include <fmt/format.h>
#include "config_tools.h"
#include <Common/StringUtils/StringUtils.h>
@ -332,6 +334,20 @@ struct Checker
#endif
;
void checkHarmfulEnvironmentVariables()
{
/// The list is a selection from "man ld-linux". And one variable that is Mac OS X specific.
/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete.
for (const auto * var : {"LD_PRELOAD", "LD_LIBRARY_PATH", "LD_ORIGIN_PATH", "LD_AUDIT", "LD_DYNAMIC_WEAK", "DYLD_INSERT_LIBRARIES"})
{
if (const char * value = getenv(var); value && value[0])
{
std::cerr << fmt::format("Environment variable {} is set to {}. It can compromise security.\n", var, value);
_exit(1);
}
}
}
}
@ -352,6 +368,8 @@ int main(int argc_, char ** argv_)
inside_main = true;
SCOPE_EXIT({ inside_main = false; });
checkHarmfulEnvironmentVariables();
/// Reset new handler to default (that throws std::bad_alloc)
/// It is needed because LLVM library clobbers it.
std::set_new_handler(nullptr);

View File

@ -1716,6 +1716,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
return Application::EXIT_OK;
}
void Server::createServers(
Poco::Util::AbstractConfiguration & config,
const std::vector<std::string> & listen_hosts,

View File

@ -589,7 +589,7 @@
stats.innerText = `Elapsed: ${seconds} sec, read ${formatted_rows} rows, ${formatted_bytes}.`;
/// We can also render graphs if user performed EXPLAIN PIPELINE graph=1 or EXPLAIN AST graph = 1
if (response.data.length > 3 && response.data[0][0].startsWith("digraph") && document.getElementById('query').value.match(/^\s*EXPLAIN/i)) {
if (response.data.length > 3 && document.getElementById('query').value.match(/^\s*EXPLAIN/i) && typeof(response.data[0][0]) === "string" && response.data[0][0].startsWith("digraph")) {
renderGraph(response);
} else {
renderTable(response);

99
release
View File

@ -1,99 +0,0 @@
#!/usr/bin/env bash
# If you have "no space left" error, you can change the location of temporary files with BUILDPLACE environment variable.
# Version increment:
# Default release: 18.1.2 -> 18.2.0:
# ./release --version
# or
# ./release --version minor
# Bugfix release (only with small patches to previous release): 18.1.2 -> 18.1.3:
# ./release --version patch
# Do this once per year: 18.1.2 -> 19.0.0:
# ./release --version major
set -e
# Avoid dependency on locale
LC_ALL=C
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
cd $CUR_DIR
source "./utils/release/release_lib.sh"
DEBUILD_NOSIGN_OPTIONS="-us -uc"
DEBUILD_NODEPS_OPTIONS="-d"
if [ -z "$VERSION_STRING" ] ; then
get_revision_author
fi
while [[ $1 == --* ]]
do
if [[ $1 == '--test' ]]; then
TEST='yes'
VERSION_POSTFIX+=+test
shift
elif [[ $1 == '--check-build-dependencies' ]]; then
DEBUILD_NODEPS_OPTIONS=""
shift
elif [[ $1 == '--version' ]]; then
gen_revision_author $2
exit 0
elif [[ $1 == '--rpm' ]]; then
MAKE_RPM=1
shift
elif [[ $1 == '--tgz' ]]; then
MAKE_TGZ=1
shift
else
echo "Unknown option $1"
exit 2
fi
done
# Build options
if [ -n "$SANITIZER" ]
then
if [[ "$SANITIZER" == "address" ]]; then VERSION_POSTFIX+="+asan"
elif [[ "$SANITIZER" == "thread" ]]; then VERSION_POSTFIX+="+tsan"
elif [[ "$SANITIZER" == "memory" ]]; then VERSION_POSTFIX+="+msan"
elif [[ "$SANITIZER" == "undefined" ]]; then VERSION_POSTFIX+="+ubsan"
else
echo "Unknown value of SANITIZER variable: $SANITIZER"
exit 3
fi
export DEB_CC=${DEB_CC=clang-10}
export DEB_CXX=${DEB_CXX=clang++-10}
EXTRAPACKAGES="$EXTRAPACKAGES clang-10 lld-10"
elif [[ $BUILD_TYPE == 'debug' ]]; then
CMAKE_BUILD_TYPE=Debug
VERSION_POSTFIX+="+debug"
fi
CMAKE_FLAGS=" $MALLOC_OPTS -DSANITIZE=$SANITIZER -DENABLE_CHECK_HEAVY_BUILDS=1 $CMAKE_FLAGS"
[[ -n "$CMAKE_BUILD_TYPE" ]] && CMAKE_FLAGS=" -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE $CMAKE_FLAGS"
export CMAKE_FLAGS
export EXTRAPACKAGES
VERSION_STRING+=$VERSION_POSTFIX
echo -e "\nCurrent version is $VERSION_STRING"
if [ -z "$NO_BUILD" ] ; then
gen_changelog "$VERSION_STRING" "" "$AUTHOR" ""
# Build (only binary packages).
debuild --preserve-env -e PATH \
-e DEB_CC=$DEB_CC -e DEB_CXX=$DEB_CXX -e CMAKE_FLAGS="$CMAKE_FLAGS" \
-b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS} ${DEB_ARCH_FLAG}
fi
if [ -n "$MAKE_RPM" ]; then
make_rpm
fi
if [ -n "$MAKE_TGZ" ]; then
make_tgz
fi

View File

@ -327,7 +327,8 @@ void DiskAccessStorage::scheduleWriteLists(AccessEntityType type)
/// Create the 'need_rebuild_lists.mark' file.
/// This file will be used later to find out if writing lists is successful or not.
std::ofstream{getNeedRebuildListsMarkFilePath(directory_path)};
std::ofstream out{getNeedRebuildListsMarkFilePath(directory_path)};
out.close();
lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this};
lists_writing_thread_is_waiting = true;

View File

@ -39,7 +39,7 @@ bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept
}
template <class First, class ... TArgs>
static IAggregateFunction * create(const IDataType & second_type, TArgs && ... args)
IAggregateFunction * create(const IDataType & second_type, TArgs && ... args)
{
const WhichDataType which(second_type);
@ -51,7 +51,7 @@ static IAggregateFunction * create(const IDataType & second_type, TArgs && ... a
// Not using helper functions because there are no templates for binary decimal/numeric function.
template <class... TArgs>
static IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
{
const WhichDataType which(first_type);

View File

@ -30,7 +30,7 @@ AggregateFunctionPtr createAggregateFunctionDeltaSum(
throw Exception("Incorrect number of arguments for aggregate function " + name,
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
DataTypePtr data_type = arguments[0];
const DataTypePtr & data_type = arguments[0];
if (isInteger(data_type) || isFloat(data_type))
return AggregateFunctionPtr(createWithNumericType<AggregationFunctionDeltaSum>(

View File

@ -20,7 +20,7 @@ namespace
{
template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
static IAggregateFunction * createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
IAggregateFunction * createWithNumericOrTimeType(const IDataType & argument_type, TArgs && ... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::Date) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);

View File

@ -19,7 +19,7 @@ namespace ErrorCodes
namespace
{
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> typename Data, typename... TArgs>
static IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs &&... args)
IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs &&... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);

View File

@ -40,7 +40,7 @@ public:
};
template <typename HasLimit, typename ... TArgs>
static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, TArgs && ... args)
IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, TArgs && ... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::Date) return new AggregateFunctionGroupUniqArrayDate<HasLimit>(argument_type, std::forward<TArgs>(args)...);

View File

@ -56,7 +56,7 @@ private:
/// The name of the nested function, including combinators (i.e. *If)
///
/// getName() from the nested_function cannot be used because in case of *If combinator
/// with Nullable argument nested_function will point to the function w/o combinator.
/// with Nullable argument nested_function will point to the function without combinator.
/// (I.e. sumIf(Nullable, 1) -> sum()), and distributed query processing will fail.
///
/// And nested_function cannot point to the function with *If since

View File

@ -60,7 +60,7 @@ template <typename Value, bool float_return> using FuncQuantileBFloat16Weighted
template <typename Value, bool float_return> using FuncQuantilesBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, true>;
template <template <typename, bool> class Function>
static constexpr bool supportDecimal()
constexpr bool supportDecimal()
{
return std::is_same_v<Function<Float32, false>, FuncQuantile<Float32, false>> ||
std::is_same_v<Function<Float32, false>, FuncQuantiles<Float32, false>> ||
@ -75,7 +75,7 @@ static constexpr bool supportDecimal()
}
template <template <typename, bool> class Function>
static constexpr bool supportBigInt()
constexpr bool supportBigInt()
{
return std::is_same_v<Function<Float32, false>, FuncQuantile<Float32, false>> ||
std::is_same_v<Function<Float32, false>, FuncQuantiles<Float32, false>> ||

View File

@ -19,7 +19,7 @@ namespace
{
template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
static IAggregateFunction * createWithUIntegerOrTimeType(const std::string & name, const IDataType & argument_type, TArgs && ... args)
IAggregateFunction * createWithUIntegerOrTimeType(const std::string & name, const IDataType & argument_type, TArgs && ... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::Date || which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);

View File

@ -24,7 +24,7 @@ AggregateFunctionPtr createAggregateFunctionStatisticsUnary(
assertUnary(name, argument_types);
AggregateFunctionPtr res;
DataTypePtr data_type = argument_types[0];
const DataTypePtr & data_type = argument_types[0];
if (isDecimal(data_type))
res.reset(createWithDecimalType<FunctionTemplate>(*data_type, *data_type, argument_types));
else

View File

@ -56,7 +56,7 @@ AggregateFunctionPtr createAggregateFunctionSum(const std::string & name, const
assertUnary(name, argument_types);
AggregateFunctionPtr res;
DataTypePtr data_type = argument_types[0];
const DataTypePtr & data_type = argument_types[0];
if (isDecimal(data_type))
res.reset(createWithDecimalType<Function>(*data_type, *data_type, argument_types));
else

View File

@ -29,7 +29,7 @@ createAggregateFunctionSumCount(const std::string & name, const DataTypes & argu
assertUnary(name, argument_types);
AggregateFunctionPtr res;
DataTypePtr data_type = argument_types[0];
const DataTypePtr & data_type = argument_types[0];
if (!allowType(data_type))
throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

View File

@ -44,7 +44,7 @@ class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateT
template <bool is_weighted>
static IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 load_factor, const Array & params)
IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 load_factor, const Array & params)
{
if (argument_types.empty())
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Got empty arguments list");

View File

@ -59,7 +59,7 @@
/** This hash function is not the most optimal, but UniquesHashSet states counted with it,
* stored in many places on disks (in the Yandex.Metrika), so it continues to be used.
* stored in many places on disks (in many companies), so it continues to be used.
*/
struct UniquesHashSetDefaultHash
{

View File

@ -1058,7 +1058,13 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars
/// Process the query that requires transferring data blocks to the server.
const auto parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && std_in.eof())))
throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
{
const auto & settings = global_context->getSettingsRef();
if (settings.throw_if_no_data_to_insert)
throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
else
return;
}
connection->sendQuery(
connection_parameters.timeouts,
@ -1649,7 +1655,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
catch (...)
{
// Surprisingly, this is a client error. A server error would
// have been reported w/o throwing (see onReceiveSeverException()).
// have been reported without throwing (see onReceiveSeverException()).
client_exception = std::make_unique<Exception>(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
have_error = true;
}
@ -1692,7 +1698,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
if (!test_hint.clientError() && !test_hint.serverError())
{
// No error was expected but it still occurred. This is the
// default case w/o test hint, doesn't need additional
// default case without test hint, doesn't need additional
// diagnostics.
error_matches_hint = false;
}

View File

@ -55,7 +55,7 @@ public:
/// 2) Determine the includes file from the config: <include_from>/path2/metrika.xml</include_from>
/// If this path is not configured, use /etc/metrika.xml
/// 3) Replace elements matching the "<foo incl="bar"/>" pattern with
/// "<foo>contents of the yandex/bar element in metrika.xml</foo>"
/// "<foo>contents of the clickhouse/bar element in metrika.xml</foo>"
/// 4) If zk_node_cache is non-NULL, replace elements matching the "<foo from_zk="/bar">" pattern with
/// "<foo>contents of the /bar ZooKeeper node</foo>".
/// If has_zk_includes is non-NULL and there are such elements, set has_zk_includes to true.
@ -137,4 +137,3 @@ private:
};
}

View File

@ -89,7 +89,7 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
/// NOTE:
/// - Poco::Net::DNS::resolveOne(host) doesn't work for IP addresses like 127.0.0.2
/// - Poco::Net::IPAddress::tryParse() expect hex string for IPv6 (w/o brackets)
/// - Poco::Net::IPAddress::tryParse() expect hex string for IPv6 (without brackets)
if (host.starts_with('['))
{
assert(host.ends_with(']'));

View File

@ -163,6 +163,11 @@ bool FileSegment::isDownloader() const
return getCallerId() == downloader_id;
}
bool FileSegment::isDownloaderImpl(std::lock_guard<std::mutex> & /* segment_lock */) const
{
return getCallerId() == downloader_id;
}
FileSegment::RemoteFileReaderPtr FileSegment::getRemoteFileReader()
{
if (!isDownloader())
@ -397,6 +402,9 @@ bool FileSegment::reserve(size_t size)
void FileSegment::setDownloaded(std::lock_guard<std::mutex> & /* segment_lock */)
{
if (is_downloaded)
return;
download_state = State::DOWNLOADED;
is_downloaded = true;
downloader_id.clear();
@ -426,8 +434,7 @@ void FileSegment::completeBatchAndResetDownloader()
{
std::lock_guard segment_lock(mutex);
bool is_downloader = downloader_id == getCallerId();
if (!is_downloader)
if (!isDownloaderImpl(segment_lock))
{
cv.notify_all();
throw Exception(
@ -448,7 +455,7 @@ void FileSegment::complete(State state)
std::lock_guard cache_lock(cache->mutex);
std::lock_guard segment_lock(mutex);
bool is_downloader = downloader_id == getCallerId();
bool is_downloader = isDownloaderImpl(segment_lock);
if (!is_downloader)
{
cv.notify_all();
@ -465,6 +472,9 @@ void FileSegment::complete(State state)
"Cannot complete file segment with state: {}", stateToString(state));
}
if (state == State::DOWNLOADED)
setDownloaded(segment_lock);
download_state = state;
assertNotDetached();
@ -475,7 +485,7 @@ void FileSegment::complete(State state)
}
catch (...)
{
if (!downloader_id.empty() && downloader_id == getCallerIdImpl())
if (!downloader_id.empty() && is_downloader)
downloader_id.clear();
cv.notify_all();
@ -492,8 +502,12 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
if (download_state == State::SKIP_CACHE || detached)
return;
if (download_state != State::DOWNLOADED && getDownloadedSize(segment_lock) == range().size())
if (isDownloaderImpl(segment_lock)
&& download_state != State::DOWNLOADED
&& getDownloadedSize(segment_lock) == range().size())
{
setDownloaded(segment_lock);
}
assertNotDetached();
@ -502,7 +516,7 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
/// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the
/// downloader or the only owner of the segment.
bool can_update_segment_state = downloader_id == getCallerIdImpl()
bool can_update_segment_state = isDownloaderImpl(segment_lock)
|| cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
if (can_update_segment_state)
@ -515,7 +529,7 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
}
catch (...)
{
if (!downloader_id.empty() && downloader_id == getCallerIdImpl())
if (!downloader_id.empty() && isDownloaderImpl(segment_lock))
downloader_id.clear();
cv.notify_all();
@ -561,7 +575,7 @@ void FileSegment::completeImpl(std::lock_guard<std::mutex> & cache_lock, std::lo
}
}
if (!downloader_id.empty() && (downloader_id == getCallerIdImpl() || is_last_holder))
if (!downloader_id.empty() && (isDownloaderImpl(segment_lock) || is_last_holder))
{
LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state));
downloader_id.clear();

View File

@ -154,6 +154,7 @@ private:
void setDownloaded(std::lock_guard<std::mutex> & segment_lock);
void setDownloadFailed(std::lock_guard<std::mutex> & segment_lock);
bool isDownloaderImpl(std::lock_guard<std::mutex> & segment_lock) const;
void wrapWithCacheInfo(Exception & e, const String & message, std::lock_guard<std::mutex> & segment_lock) const;

View File

@ -13,7 +13,7 @@
*
* Example: when we do aggregation by the visitor ID, the performance increase is more than 5 times.
* This is because of following reasons:
* - in Yandex, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
* - in Metrica web analytics system, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
* - in typical implementation of standard library, hash function for integers is trivial and just use lower bits;
* - traffic is non-uniformly distributed across a day;
* - we are using open-addressing linear probing hash tables that are most critical to hash function quality,

View File

@ -28,7 +28,7 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
* in which all metacharacters are escaped,
* and also if there are no '|' outside the brackets,
* and also avoid substrings of the form `http://` or `www` and some other
* (this is the hack for typical use case in Yandex.Metrica).
* (this is the hack for typical use case in web analytics applications).
*/
const char * begin = regexp.data();
const char * pos = begin;

View File

@ -31,7 +31,7 @@ namespace Util
/// context can't own, as Context is destroyed before logger,
/// and logger lives longer and logging can still happen after Context destruction.
/// resetting masker in the logger at the moment of
/// context destruction can't be done w/o synchronization / locks in a safe manner.
/// context destruction can't be done without synchronization / locks in a safe manner.
///
/// logger is Poco derived and i didn't want to brake it's interface,
/// also logger can be dynamically reconfigured without server restart,

View File

@ -1,4 +1,4 @@
# These files are located in separate library, because they are used by Yandex.Metrika code
# These files are located in separate library, because they are used by separate products
# in places when no dependency on whole "dbms" library is possible.
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")

View File

@ -100,7 +100,7 @@ enum ComputeWidthMode
};
template <ComputeWidthMode mode>
static size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
{
UTF8Decoder decoder;
size_t width = 0;

View File

@ -45,7 +45,7 @@ struct ZooKeeperResponse : virtual Response
using ZooKeeperResponsePtr = std::shared_ptr<ZooKeeperResponse>;
/// Exposed in header file for Yandex.Metrica code.
/// Exposed in header file for some external code.
struct ZooKeeperRequest : virtual Request
{
XID xid = 0;

View File

@ -7,8 +7,5 @@ target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zoo
add_executable(zkutil_test_async zkutil_test_async.cpp)
target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper_no_log)
add_executable (zk_many_watches_reconnect zk_many_watches_reconnect.cpp)
target_link_libraries (zk_many_watches_reconnect PRIVATE clickhouse_common_zookeeper_no_log clickhouse_common_config)
add_executable (zookeeper_impl zookeeper_impl.cpp)
target_link_libraries (zookeeper_impl PRIVATE clickhouse_common_zookeeper_no_log)

View File

@ -1,66 +0,0 @@
#include <Common/Config/ConfigProcessor.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Poco/Event.h>
#include <iostream>
/// A tool for reproducing https://issues.apache.org/jira/browse/ZOOKEEPER-706
/// Original libzookeeper can't reconnect the session if the length of SET_WATCHES message
/// exceeds jute.maxbuffer (0xfffff by default).
/// This happens when the number of watches exceeds ~29000.
///
/// Session reconnect can be caused by forbidding packets to the current zookeeper server, e.g.
/// sudo ip6tables -A OUTPUT -d mtzoo01it.haze.yandex.net -j REJECT
const size_t N_THREADS = 100;
int main(int argc, char ** argv)
{
try
{
if (argc != 3)
{
std::cerr << "usage: " << argv[0] << " <zookeeper_config> <number_of_watches>" << std::endl;
return 3;
}
DB::ConfigProcessor processor(argv[1], false, true);
auto config = processor.loadConfig().configuration;
zkutil::ZooKeeper zk(*config, "zookeeper", nullptr);
zkutil::EventPtr watch = std::make_shared<Poco::Event>();
/// NOTE: setting watches in multiple threads because doing it in a single thread is too slow.
size_t watches_per_thread = std::stoull(argv[2]) / N_THREADS;
std::vector<std::thread> threads;
for (size_t i_thread = 0; i_thread < N_THREADS; ++i_thread)
{
threads.emplace_back([&, i_thread]
{
for (size_t i = 0; i < watches_per_thread; ++i)
zk.exists("/clickhouse/nonexistent_node" + std::to_string(i * N_THREADS + i_thread), nullptr, watch);
});
}
for (size_t i_thread = 0; i_thread < N_THREADS; ++i_thread)
threads[i_thread].join();
while (true)
{
std::cerr << "WAITING..." << std::endl;
sleep(10);
}
}
catch (Poco::Exception & e)
{
std::cerr << "Exception: " << e.displayText() << std::endl;
return 1;
}
catch (std::exception & e)
{
std::cerr << "std::exception: " << e.what() << std::endl;
return 3;
}
catch (...)
{
std::cerr << "Some exception" << std::endl;
return 2;
}
}

View File

@ -5,82 +5,63 @@
# include <cmath>
# include <fstream>
#endif
#if USE_CPUID
# include <libcpuid/libcpuid.h>
#endif
#include <thread>
#if defined(OS_LINUX)
unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count)
static int readFrom(const char * filename, int default_value)
{
// Try to look at cgroups limit if it is available.
auto read_from = [](const char * filename, int default_value) -> int {
std::ifstream infile(filename);
if (!infile.is_open())
{
return default_value;
}
int idata;
if (infile >> idata)
return idata;
else
return default_value;
};
std::ifstream infile(filename);
if (!infile.is_open())
return default_value;
int idata;
if (infile >> idata)
return idata;
else
return default_value;
}
/// Try to look at cgroups limit if it is available.
static unsigned getCGroupLimitedCPUCores(unsigned default_cpu_count)
{
unsigned quota_count = default_cpu_count;
// Return the number of milliseconds per period process is guaranteed to run.
// -1 for no quota
int cgroup_quota = read_from("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", -1);
int cgroup_period = read_from("/sys/fs/cgroup/cpu/cpu.cfs_period_us", -1);
/// Return the number of milliseconds per period process is guaranteed to run.
/// -1 for no quota
int cgroup_quota = readFrom("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", -1);
int cgroup_period = readFrom("/sys/fs/cgroup/cpu/cpu.cfs_period_us", -1);
if (cgroup_quota > -1 && cgroup_period > 0)
{
quota_count = ceil(static_cast<float>(cgroup_quota) / static_cast<float>(cgroup_period));
}
return std::min(default_cpu_count, quota_count);
}
#endif // OS_LINUX
#endif
static unsigned getNumberOfPhysicalCPUCoresImpl()
{
unsigned cpu_count = std::thread::hardware_concurrency();
/// Most of x86_64 CPUs have 2-way Hyper-Threading
/// Aarch64 and RISC-V don't have SMT so far.
/// POWER has SMT and it can be multiple way (like 8-way), but we don't know how ClickHouse really behaves, so use all of them.
#if defined(__x86_64__)
/// Let's limit ourself to the number of physical cores.
/// But if the number of logical cores is small - maybe it is a small machine
/// or very limited cloud instance and it is reasonable to use all the cores.
if (cpu_count >= 8)
cpu_count /= 2;
#endif
#if defined(OS_LINUX)
cpu_count = getCGroupLimitedCPUCores(cpu_count);
#endif
return cpu_count;
}
unsigned getNumberOfPhysicalCPUCores()
{
static const unsigned number = [] {
unsigned cpu_count = 0; // start with an invalid num
#if USE_CPUID
do
{
cpu_raw_data_t raw_data;
cpu_id_t data;
/// On Xen VMs, libcpuid returns wrong info (zero number of cores). Fallback to alternative method.
/// Also, libcpuid does not support some CPUs like AMD Hygon C86 7151.
if (0 != cpuid_get_raw_data(&raw_data) || 0 != cpu_identify(&raw_data, &data) || data.num_logical_cpus == 0)
{
// Just fallback
break;
}
cpu_count = data.num_cores * data.total_logical_cpus / data.num_logical_cpus;
/// Also, libcpuid gives strange result on Google Compute Engine VMs.
/// Example:
/// num_cores = 12, /// number of physical cores on current CPU socket
/// total_logical_cpus = 1, /// total number of logical cores on all sockets
/// num_logical_cpus = 24. /// number of logical cores on current CPU socket
/// It means two-way hyper-threading (24 / 12), but contradictory, 'total_logical_cpus' == 1.
} while (false);
#endif
/// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system.
/// (Actually, only Aarch64 is supported).
if (cpu_count == 0)
cpu_count = std::thread::hardware_concurrency();
#if defined(OS_LINUX)
/// TODO: add a setting for disabling that, similar to UseContainerSupport in java
cpu_count = getCGroupLimitedCPUCores(cpu_count);
#endif // OS_LINUX
return cpu_count;
}();
return number;
/// Calculate once.
static auto res = getNumberOfPhysicalCPUCoresImpl();
return res;
}

View File

@ -10,10 +10,10 @@
/** 'mysqlxx' - very simple library for replacement of 'mysql++' library.
*
* For whatever reason, in Yandex.Metrica, back in 2008, 'mysql++' library was used.
* For whatever reason, in Metrica web analytics system, back in 2008, 'mysql++' library was used.
* There are the following shortcomings of 'mysql++':
* 1. Too rich functionality: most of it is not used.
* 2. Low performance (when used for Yandex.Metrica).
* 2. Low performance (when used for Metrica).
*
* Low performance is caused by the following reasons:
*
@ -50,7 +50,7 @@
* And for the sake of simplicity, some functions work only with certain assumptions,
* or with slightly different semantic than in mysql++.
* And we don't care about cross-platform usage of mysqlxx.
* These assumptions are specific for Yandex.Metrica. Your mileage may vary.
* These assumptions are specific for Metrica. Your mileage may vary.
*
* mysqlxx could not be considered as separate full-featured library,
* because it is developed from the principle - "everything that we don't need is not implemented".

View File

@ -13,8 +13,8 @@ namespace DB
* Otherwise, an exception is thrown.
*
* Examples:
* yandex.ru - returns "yandex.ru" and default_port
* yandex.ru:80 - returns "yandex.ru" and 80
* clickhouse.com - returns "clickhouse.com" and default_port
* clickhouse.com:80 - returns "clickhouse.com" and 80
* [2a02:6b8:a::a]:80 - returns [2a02:6b8:a::a] and 80; note that square brackets remain in returned host.
*/
std::pair<std::string, UInt16> parseAddress(const std::string & str, UInt16 default_port);

View File

@ -298,7 +298,7 @@ String ServerStatCommand::run()
write("Latency min/avg/max", latency.str());
write("Received", toString(stats.getPacketsReceived()));
write("Sent ", toString(stats.getPacketsSent()));
write("Sent", toString(stats.getPacketsSent()));
write("Connections", toString(keeper_info.alive_connections_count));
write("Outstanding", toString(keeper_info.outstanding_requests_count));
write("Zxid", toString(keeper_info.last_zxid));
@ -328,7 +328,7 @@ String StatCommand::run()
write("Latency min/avg/max", latency.str());
write("Received", toString(stats.getPacketsReceived()));
write("Sent ", toString(stats.getPacketsSent()));
write("Sent", toString(stats.getPacketsSent()));
write("Connections", toString(keeper_info.alive_connections_count));
write("Outstanding", toString(keeper_info.outstanding_requests_count));
write("Zxid", toString(keeper_info.last_zxid));

View File

@ -58,7 +58,7 @@ class IColumn;
M(Milliseconds, connect_timeout_with_failover_secure_ms, 100, "Connection timeout for selecting first healthy replica (for secure connections).", 0) \
M(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "", 0) \
M(Seconds, send_timeout, DBMS_DEFAULT_SEND_TIMEOUT_SEC, "", 0) \
M(Seconds, drain_timeout, 3, "Timeout for draining remote connections, -1 means synchronous drain w/o ignoring errors", 0) \
M(Seconds, drain_timeout, 3, "Timeout for draining remote connections, -1 means synchronous drain without ignoring errors", 0) \
M(Seconds, tcp_keep_alive_timeout, 290 /* less than DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC */, "The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes", 0) \
M(Milliseconds, hedged_connection_timeout_ms, 100, "Connection timeout for establishing connection with replica for Hedged requests", 0) \
M(Milliseconds, receive_data_timeout_ms, 2000, "Connection timeout for receiving first packet of data or packet with positive progress from replica", 0) \
@ -574,7 +574,7 @@ class IColumn;
M(Bool, check_table_dependencies, true, "Check that DDL query (such as DROP TABLE or RENAME) will not break dependencies", 0) \
M(Bool, use_local_cache_for_remote_storage, true, "Use local cache for remote storage like HDFS or S3, it's used for remote table engine only", 0) \
\
M(Bool, allow_unrestricted_reads_from_keeper, false, "Allow unrestricted (w/o condition on path) reads from system.zookeeper table, can be handy, but is not safe for zookeeper", 0) \
M(Bool, allow_unrestricted_reads_from_keeper, false, "Allow unrestricted (without condition on path) reads from system.zookeeper table, can be handy, but is not safe for zookeeper", 0) \
\
/** Experimental functions */ \
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
@ -582,6 +582,7 @@ class IColumn;
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, disable by default", 0) \
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.

View File

@ -191,6 +191,33 @@ static void checkASTStructure(const ASTPtr & child)
ErrorCodes::UNEXPECTED_AST_STRUCTURE);
}
static void autoAssignNumberForEnum(const ASTPtr & arguments)
{
UInt64 literal_child_count = 0;
UInt64 func_child_count = 0;
ASTs assign_number_child;
assign_number_child.reserve(arguments->children.size());
for (const ASTPtr & child : arguments->children)
{
if (child->as<ASTLiteral>())
{
ASTPtr func = makeASTFunction("equals", child, std::make_shared<ASTLiteral>(++literal_child_count));
assign_number_child.emplace_back(func);
}
else
{
++func_child_count;
assign_number_child.emplace_back(child);
}
}
if (func_child_count > 0 && literal_child_count > 0)
throw Exception("ALL Elements of Enum data type must be of form: 'name' = number or 'name', where name is string literal and number is an integer",
ErrorCodes::UNEXPECTED_AST_STRUCTURE);
arguments->children = assign_number_child;
}
template <typename DataTypeEnum>
static DataTypePtr createExact(const ASTPtr & arguments)
{
@ -202,6 +229,7 @@ static DataTypePtr createExact(const ASTPtr & arguments)
using FieldType = typename DataTypeEnum::FieldType;
autoAssignNumberForEnum(arguments);
/// Children must be functions 'equals' with string literal as left argument and numeric literal as right argument.
for (const ASTPtr & child : arguments->children)
{
@ -236,6 +264,7 @@ static DataTypePtr create(const ASTPtr & arguments)
if (!arguments || arguments->children.empty())
throw Exception("Enum data type cannot be empty", ErrorCodes::EMPTY_DATA_PASSED);
autoAssignNumberForEnum(arguments);
/// Children must be functions 'equals' with string literal as left argument and numeric literal as right argument.
for (const ASTPtr & child : arguments->children)
{

View File

@ -44,7 +44,7 @@ void DatabaseMemory::dropTable(
auto table = detachTableUnlocked(table_name, lock);
try
{
/// Remove table w/o lock since:
/// Remove table without lock since:
/// - it does not require it
/// - it may cause lock-order-inversion if underlying storage need to
/// resolve tables (like StorageLiveView)

View File

@ -649,7 +649,7 @@ void HashedDictionary<dictionary_key_type, sparse>::calculateBytesAllocated()
if constexpr (sparse || std::is_same_v<AttributeValueType, Field>)
{
/// bucket_count() - Returns table size, that includes empty and deleted
/// size() - Returns table size, w/o empty and deleted
/// size() - Returns table size, without empty and deleted
/// and since this is sparsehash, empty cells should not be significant,
/// and since items cannot be removed from the dictionary, deleted is also not important.
bytes_allocated += container.size() * (sizeof(KeyType) + sizeof(AttributeValueType));

View File

@ -288,8 +288,8 @@ ColumnUInt8::Ptr IPAddressDictionary::hasKeys(const Columns & key_columns, const
{
validateKeyTypes(key_types);
const auto first_column = key_columns.front();
const auto rows = first_column->size();
const auto & first_column = key_columns.front();
const size_t rows = first_column->size();
auto result = ColumnUInt8::create(rows);
auto & out = result->getData();
@ -613,8 +613,8 @@ void IPAddressDictionary::getItemsByTwoKeyColumnsImpl(
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto first_column = key_columns.front();
const auto rows = first_column->size();
const auto & first_column = key_columns.front();
const size_t rows = first_column->size();
auto & vec = std::get<ContainerType<AttributeType>>(attribute.maps);
if (const auto * ipv4_col = std::get_if<IPv4Container>(&ip_column))
@ -694,8 +694,8 @@ void IPAddressDictionary::getItemsImpl(
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto first_column = key_columns.front();
const auto rows = first_column->size();
const auto & first_column = key_columns.front();
const size_t rows = first_column->size();
// special case for getBlockInputStream
if (unlikely(key_columns.size() == 2))

View File

@ -1,8 +1,6 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#if USE_AZURE_BLOB_STORAGE

View File

@ -1,6 +1,4 @@
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#include <Disks/DiskFactory.h>

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
* <disks>
* <web>
* <type>web</type>
* <endpoint>https://clickhouse-datasets.s3.yandex.net/disk-with-static-files-tests/test-hits/</endpoint>
* <endpoint>https://clickhouse-datasets.s3.amazonaws.com/disk-with-static-files-tests/test-hits/</endpoint>
* </web>
* </disks>
* <policies>

View File

@ -613,7 +613,10 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
{
bool need_complete_file_segment = file_segment->isDownloader();
if (need_complete_file_segment)
{
LOG_TEST(log, "Resetting downloader {} from scope exit", file_segment->getDownloader());
file_segment->completeBatchAndResetDownloader();
}
}
catch (...)
{

View File

@ -539,7 +539,7 @@ struct ToStartOfFifteenMinutesImpl
using FactorTransform = ZeroTransform;
};
/// Round to start of half-an-hour length interval with unspecified offset. This transform is specific for Yandex.Metrica.
/// Round to start of half-an-hour length interval with unspecified offset. This transform is specific for Metrica web analytics system.
struct TimeSlotImpl
{
static constexpr auto name = "timeSlot";

View File

@ -40,7 +40,7 @@ public:
registerFunction(name, &Function::create, case_sensitiveness);
}
/// This function is used by YQL - internal Yandex product that depends on ClickHouse by source code.
/// This function is used by YQL - innovative transactional DBMS that depends on ClickHouse by source code.
std::vector<std::string> getAllNames() const;
bool has(const std::string & name) const;

View File

@ -128,7 +128,7 @@ void validateArgumentsImpl(const IFunction & func,
}
const auto & arg = arguments[i + argument_offset];
const auto descriptor = descriptors[i];
const auto & descriptor = descriptors[i];
if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0)
throw Exception("Illegal type of argument #" + std::to_string(argument_offset + i + 1) // +1 is for human-friendly 1-based indexing
+ (descriptor.argument_name ? " '" + std::string(descriptor.argument_name) + "'" : String{})

View File

@ -10,7 +10,7 @@
#include "config_functions.h"
/** FastOps is a fast vector math library from Mikhail Parakhin (former Yandex CTO),
/** FastOps is a fast vector math library from Mikhail Parakhin, https://www.linkedin.com/in/mikhail-parakhin/
* Enabled by default.
*/
#if USE_FASTOPS

View File

@ -33,7 +33,7 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
/** Functions using Yandex.Metrica dictionaries
/** Functions using deprecated dictionaries
* - dictionaries of regions, operating systems, search engines.
*
* Climb up the tree to a certain level.

View File

@ -195,7 +195,7 @@ public:
* that is the initiator of a distributed query,
* in the case when the function will be invoked for real data only at the remote servers.
* This feature is controversial and implemented specially
* for backward compatibility with the case in Yandex Banner System.
* for backward compatibility with the case in the Banner System application.
*/
if (input_rows_count == 0)
return result_type->createColumn();

View File

@ -95,7 +95,7 @@ void convertAnyColumnToBool(const IColumn * column, UInt8Container & res)
template <class Op, typename Func>
static bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func)
bool extractConstColumns(ColumnRawPtrs & in, UInt8 & res, Func && func)
{
bool has_res = false;
@ -345,7 +345,7 @@ struct OperationApplier<Op, OperationApplierImpl, 0>
template <class Op>
static ColumnPtr executeForTernaryLogicImpl(ColumnRawPtrs arguments, const DataTypePtr & result_type, size_t input_rows_count)
ColumnPtr executeForTernaryLogicImpl(ColumnRawPtrs arguments, const DataTypePtr & result_type, size_t input_rows_count)
{
/// Combine all constant columns into a single constant value.
UInt8 const_3v_value = 0;
@ -420,7 +420,7 @@ struct TypedExecutorInvoker<Op>
/// Types of all of the arguments are guaranteed to be non-nullable here
template <class Op>
static ColumnPtr basicExecuteImpl(ColumnRawPtrs arguments, size_t input_rows_count)
ColumnPtr basicExecuteImpl(ColumnRawPtrs arguments, size_t input_rows_count)
{
/// Combine all constant columns into a single constant value.
UInt8 const_val = 0;

View File

@ -12,7 +12,7 @@
/** Functions for retrieving "visit parameters".
* Visit parameters in Yandex.Metrika are a special kind of JSONs.
* Visit parameters in Metrica web analytics system are a special kind of JSONs.
* These functions are applicable to almost any JSONs.
* Implemented via templates from FunctionsStringSearch.h.
*

View File

@ -45,7 +45,7 @@ namespace DB
* Remove specified parameter from URL.
* cutURLParameter(URL, name)
*
* Get array of URL 'hierarchy' as in Yandex.Metrica tree-like reports. See docs.
* Get array of URL 'hierarchy' as in web-analytics tree-like reports. See the docs.
* URLHierarchy(URL)
*/

View File

@ -146,7 +146,7 @@ public:
void update(size_t from)
{
sink_null_map[index] = bool(src_null_map && src_null_map[from]);
sink_null_map[index] = src_null_map && src_null_map[from];
++index;
}

View File

@ -15,7 +15,7 @@
// for better debug: #include <Core/iostream_debug_helpers.h>
/** The function will enumerate distinct values of the passed multidimensional arrays looking inside at the specified depths.
* This is very unusual function made as a special order for Yandex.Metrica.
* This is very unusual function made as a special order for our dear customer - Metrica web analytics system.
*
* arrayEnumerateUniqRanked(['hello', 'world', 'hello']) = [1, 1, 2]
* - it returns similar structured array containing number of occurrence of the corresponding value.

View File

@ -718,9 +718,7 @@ private:
/**
* Catches arguments of type LowCardinality(T) (left) and U (right).
*
* The perftests
* https://clickhouse-test-reports.s3.yandex.net/12550/2d27fa0fa8c198a82bf1fe3625050ccf56695976/integration_tests_(release).html
* showed that the amount of action needed to convert the non-constant right argument to the index column
* The perftests showed that the amount of action needed to convert the non-constant right argument to the index column
* (similar to the left one's) is significantly higher than converting the array itself to an ordinary column.
*
* So, in terms of performance it's more optimal to fall back to default implementation and catch only constant

View File

@ -53,7 +53,7 @@
*
* Usage example:
*
* SELECT extractTextFromHTML(html) FROM url('https://yandex.ru/', RawBLOB, 'html String')
* SELECT extractTextFromHTML(html) FROM url('https://github.com/ClickHouse/ClickHouse', RawBLOB, 'html String')
*
* - ClickHouse has embedded web browser.
*/

View File

@ -45,7 +45,7 @@ using namespace GatherUtils;
*/
template <typename ArrayCond, typename ArrayA, typename ArrayB, typename ArrayResult, typename ResultType>
static inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const ArrayB & b, ArrayResult & res)
inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const ArrayB & b, ArrayResult & res)
{
size_t size = cond.size();
bool a_is_short = a.size() < size;
@ -77,7 +77,7 @@ static inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, co
}
template <typename ArrayCond, typename ArrayA, typename B, typename ArrayResult, typename ResultType>
static inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a, B b, ArrayResult & res)
inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a, B b, ArrayResult & res)
{
size_t size = cond.size();
bool a_is_short = a.size() < size;
@ -95,7 +95,7 @@ static inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a,
}
template <typename ArrayCond, typename A, typename ArrayB, typename ArrayResult, typename ResultType>
static inline void fillConstantVector(const ArrayCond & cond, A a, const ArrayB & b, ArrayResult & res)
inline void fillConstantVector(const ArrayCond & cond, A a, const ArrayB & b, ArrayResult & res)
{
size_t size = cond.size();
bool b_is_short = b.size() < size;

View File

@ -29,7 +29,7 @@ namespace
* For example, timeSlots(toDateTime('2012-01-01 12:20:00'), 600) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')].
* This is necessary to search for hits that are part of the corresponding visit.
*
* This is obsolete function. It was developed for Yandex.Metrica, but no longer used in Yandex.
* This is obsolete function. It was developed for Metrica web analytics system, but the art of its usage has been forgotten.
* But this function was adopted by wider audience.
*/

View File

@ -35,7 +35,6 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, size_t /*input_rows_count*/) const override
{
auto arg_num = arguments[0];
const auto & arg = arguments[0];
if (arg.type->lowCardinality())

View File

@ -205,12 +205,8 @@ bool ParallelReadBuffer::nextImpl()
void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
{
SCOPE_EXIT({
std::lock_guard lock{mutex};
--active_working_reader;
if (active_working_reader == 0)
{
readers_done.notify_all();
}
if (active_working_reader.fetch_sub(1) == 1)
active_working_reader.notify_all();
});
try
@ -265,8 +261,12 @@ void ParallelReadBuffer::finishAndWait()
{
emergency_stop = true;
std::unique_lock lock{mutex};
readers_done.wait(lock, [&] { return active_working_reader == 0; });
size_t active_readers = active_working_reader.load();
while (active_readers != 0)
{
active_working_reader.wait(active_readers);
active_readers = active_working_reader.load();
}
}
}

View File

@ -135,9 +135,7 @@ private:
Segment current_segment;
size_t max_working_readers;
size_t active_working_reader{0};
// Triggered when all reader workers are done
std::condition_variable readers_done;
std::atomic_size_t active_working_reader{0};
CallbackRunner schedule;

View File

@ -66,7 +66,7 @@ public:
bool hasUnreadData() const;
// for streaming reading (like in Kafka) we need to restore initial state of the buffer
// w/o recreating the buffer.
// without recreating the buffer.
void reset();
private:

View File

@ -1,6 +1,4 @@
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#if USE_AZURE_BLOB_STORAGE

View File

@ -1,8 +1,6 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#if USE_AZURE_BLOB_STORAGE

Some files were not shown because too many files have changed in this diff Show More