Merge remote-tracking branch 'upstream/master'

This commit is contained in:
BayoNet 2018-05-14 08:39:27 +03:00
commit d010ec60a9
161 changed files with 3949 additions and 1001605 deletions

2
.gitignore vendored
View File

@ -9,7 +9,7 @@
# auto generated files
*.logrt
build
/build
/docs/en_single_page/
/docs/ru_single_page/
/docs/venv/

3
.gitmodules vendored
View File

@ -34,3 +34,6 @@
[submodule "contrib/boost"]
path = contrib/boost
url = https://github.com/ClickHouse-Extras/boost.git
[submodule "contrib/llvm"]
path = contrib/llvm
url = https://github.com/ClickHouse-Extras/llvm

View File

@ -11,6 +11,7 @@ matrix:
#
# addons:
# apt:
# update: true
# sources:
# - ubuntu-toolchain-r-test
# packages: [ g++-7, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libsparsehash-dev, librdkafka-dev, libcapnp-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo, openssl ]
@ -33,6 +34,7 @@ matrix:
addons:
apt:
update: true
sources:
- ubuntu-toolchain-r-test
- llvm-toolchain-trusty-5.0
@ -77,6 +79,7 @@ matrix:
addons:
apt:
update: true
packages: [ pbuilder, fakeroot, debhelper ]
script:
@ -94,6 +97,7 @@ matrix:
#
# addons:
# apt:
# update: true
# packages: [ pbuilder, fakeroot, debhelper ]
#
# env:
@ -115,6 +119,7 @@ matrix:
#
# addons:
# apt:
# update: true
# packages: [ pbuilder, fakeroot, debhelper ]
#
# env:
@ -137,7 +142,7 @@ matrix:
# - brew link --overwrite gcc || true
#
# env:
# - MATRIX_EVAL="export CC=gcc-7 && export CXX=g++-7"
# - MATRIX_EVAL="export CC=gcc-8 && export CXX=g++-8"
#
# script:
# - env CMAKE_FLAGS="-DUSE_INTERNAL_BOOST_LIBRARY=1" utils/travis/normal.sh

View File

@ -17,7 +17,6 @@ else ()
message (WARNING "You are using an unsupported compiler! Compilation has only been tested with Clang 5+ and GCC 7+.")
endif ()
# Write compile_commands.json
set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
@ -247,6 +246,7 @@ include (cmake/find_boost.cmake)
include (cmake/find_zlib.cmake)
include (cmake/find_zstd.cmake)
include (cmake/find_ltdl.cmake) # for odbc
include (cmake/find_termcap.cmake)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/contrib/poco/cmake/FindODBC.cmake)
include (${CMAKE_CURRENT_SOURCE_DIR}/contrib/poco/cmake/FindODBC.cmake) # for poco
else ()

136
ci/README.md Normal file
View File

@ -0,0 +1,136 @@
### Build and test ClickHouse on various plaforms
Quick and dirty scripts.
Usage example:
```
./run-with-docker.sh ubuntu:bionic jobs/quick-build/run.sh
```
Another example, check build on ARM 64:
```
./prepare-docker-image-ubuntu.sh
./run-with-docker.sh multiarch/ubuntu-core:arm64-bionic jobs/quick-build/run.sh
```
Look at `default_config` and `jobs/quick-build/config`
Various possible options. We are not going to automate testing all of them.
##### CPU architectures:
- x86_64;
- AArch64.
x86_64 is the main CPU architecture. We also have minimal support for AArch64.
##### Operating systems:
- Linux;
- FreeBSD.
We also target Mac OS X, but it's more difficult to test.
Linux is the main. FreeBSD is also supported as production OS.
Mac OS is intended only for development and have minimal support: client should work, server should just start.
##### Linux distributions:
For build:
- Ubuntu Bionic;
- Ubuntu Trusty.
For run:
- Ubuntu Hardy;
- CentOS 5
We should support almost any Linux to run ClickHouse. That's why we test also on old distributions.
##### How to obtain sources:
- use sources from local working copy;
- clone sources from github;
- download source tarball.
##### Compilers:
- gcc-7;
- gcc-8;
- clang-6;
- clang-svn.
##### Compiler installation:
- from OS packages;
- build from sources.
##### C++ standard library implementation:
- libc++;
- libstdc++ with C++11 ABI;
- libstdc++ with old ABI.
When building with clang, libc++ is used. When building with gcc, we choose libstdc++ with C++11 ABI.
##### Linkers:
- ldd;
- gold;
When building with clang on x86_64, ldd is used. Otherwise we use gold.
##### Build types:
- RelWithDebInfo;
- Debug;
- ASan;
- TSan.
##### Build types, extra:
- -g0 for quick build;
- enable test coverage;
- debug tcmalloc.
##### What to build:
- only `clickhouse` target;
- all targets;
- debian packages;
We also have intent to build RPM and simple tgz packages.
##### Where to get third-party libraries:
- from contrib directory (submodules);
- from OS packages.
The only production option is to use libraries from contrib directory.
Using libraries from OS packages is discouraged, but we also support this option.
##### Linkage types:
- static;
- shared;
Static linking is the only option for production usage.
We also have support for shared linking, but it is indended only for developers.
##### Make tools:
- make;
- ninja.
##### Installation options:
- run built `clickhouse` binary directly;
- install from packages.
##### How to obtain packages:
- build them;
- download from repository.
##### Sanity checks:
- check that clickhouse binary has no dependencies on unexpected shared libraries;
- check that source code have no style violations.
##### Tests:
- Functional tests;
- Integration tests;
- Unit tests;
- Simple sh/reference tests;
- Performance tests (note that they require predictable computing power);
- Tests for external dictionaries (should be moved to integration tests);
- Jepsen like tests for quorum inserts (not yet available in opensource).
##### Tests extra:
- Run functional tests with Valgrind.
##### Static analyzers:
- CppCheck;
- clang-tidy;
- Coverity.

38
ci/build-clang-from-sources.sh Executable file
View File

@ -0,0 +1,38 @@
#!/usr/bin/env bash
set -e -x
source default-config
# TODO Non debian systems
$SUDO apt-get install -y subversion
apt-cache search cmake3 | grep -P '^cmake3 ' && $SUDO apt-get -y install cmake3 || $SUDO apt-get -y install cmake
mkdir "${WORKSPACE}/llvm"
svn co "http://llvm.org/svn/llvm-project/llvm/${CLANG_SOURCES_BRANCH}" "${WORKSPACE}/llvm/llvm"
svn co "http://llvm.org/svn/llvm-project/cfe/${CLANG_SOURCES_BRANCH}" "${WORKSPACE}/llvm/llvm/tools/clang"
svn co "http://llvm.org/svn/llvm-project/lld/${CLANG_SOURCES_BRANCH}" "${WORKSPACE}/llvm/llvm/tools/lld"
svn co "http://llvm.org/svn/llvm-project/polly/${CLANG_SOURCES_BRANCH}" "${WORKSPACE}/llvm/llvm/tools/polly"
svn co "http://llvm.org/svn/llvm-project/clang-tools-extra/${CLANG_SOURCES_BRANCH}" "${WORKSPACE}/llvm/llvm/tools/clang/tools/extra"
svn co "http://llvm.org/svn/llvm-project/compiler-rt/${CLANG_SOURCES_BRANCH}" "${WORKSPACE}/llvm/llvm/projects/compiler-rt"
svn co "http://llvm.org/svn/llvm-project/libcxx/${CLANG_SOURCES_BRANCH}" "${WORKSPACE}/llvm/llvm/projects/libcxx"
svn co "http://llvm.org/svn/llvm-project/libcxxabi/${CLANG_SOURCES_BRANCH}" "${WORKSPACE}/llvm/llvm/projects/libcxxabi"
mkdir "${WORKSPACE}/llvm/build"
cd "${WORKSPACE}/llvm/build"
# NOTE You must build LLVM with the same ABI as ClickHouse.
# For example, if you compile ClickHouse with libc++, you must add
# -D LLVM_ENABLE_LIBCXX=1
# to the line below.
cmake -D CMAKE_BUILD_TYPE:STRING=Release ../llvm
make -j $THREADS
$SUDO make install
hash clang
cd ../../..
export CC=clang
export CXX=clang++

8
ci/build-debian-packages.sh Executable file
View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -e -x
source default-config
[[ -d "${WORKSPACE}/sources" ]] || die "Run get-sources.sh first"
./sources/release

47
ci/build-gcc-from-sources.sh Executable file
View File

@ -0,0 +1,47 @@
#!/usr/bin/env bash
set -e -x
source default-config
$SUDO apt-get install -y curl
if [[ "${GCC_SOURCES_VERSION}" == "latest" ]]; then
GCC_SOURCES_VERSION=$(curl -sSL https://ftpmirror.gnu.org/gcc/ | grep -oE 'gcc-[0-9]+(\.[0-9]+)+' | sort -Vr | head -n1)
fi
GCC_VERSION_SHORT=$(echo "$GCC_SOURCES_VERSION" | grep -oE '[0-9]' | head -n1)
echo "Will download ${GCC_SOURCES_VERSION} (short version: $GCC_VERSION_SHORT)."
THREADS=$(grep -c ^processor /proc/cpuinfo)
mkdir "${WORKSPACE}/gcc"
pushd "${WORKSPACE}/gcc"
wget https://ftpmirror.gnu.org/gcc/${GCC_SOURCES_VERSION}/${GCC_SOURCES_VERSION}.tar.xz
tar xf ${GCC_SOURCES_VERSION}.tar.xz
pushd ${GCC_SOURCES_VERSION}
./contrib/download_prerequisites
popd
mkdir gcc-build
pushd gcc-build
../${GCC_SOURCES_VERSION}/configure --enable-languages=c,c++ --disable-multilib
make -j $THREADS
$SUDO make install
popd
popd
$SUDO ln -sf /usr/local/bin/gcc /usr/local/bin/gcc-${GCC_GCC_SOURCES_VERSION_SHORT}
$SUDO ln -sf /usr/local/bin/g++ /usr/local/bin/g++-${GCC_GCC_SOURCES_VERSION_SHORT}
$SUDO ln -sf /usr/local/bin/gcc /usr/local/bin/cc
$SUDO ln -sf /usr/local/bin/g++ /usr/local/bin/c++
echo '/usr/local/lib64' | $SUDO tee /etc/ld.so.conf.d/10_local-lib64.conf
$SUDO ldconfig
hash gcc g++
gcc --version
export CC=gcc
export CXX=g++

22
ci/build-normal.sh Executable file
View File

@ -0,0 +1,22 @@
#!/usr/bin/env bash
set -e -x
source default-config
[[ -d "${WORKSPACE}/sources" ]] || die "Run get-sources.sh first"
mkdir -p "${WORKSPACE}/build"
pushd "${WORKSPACE}/build"
if [[ "${ENABLE_EMBEDDED_COMPILER}" == 1 ]]; then
[[ "$USE_LLVM_LIBRARIES_FROM_SYSTEM" == 0 ]] && CMAKE_FLAGS="$CMAKE_FLAGS -D USE_INTERNAL_LLVM_LIBRARY=1"
[[ "$USE_LLVM_LIBRARIES_FROM_SYSTEM" != 0 ]] && CMAKE_FLAGS="$CMAKE_FLAGS -D USE_INTERNAL_LLVM_LIBRARY=0"
fi
cmake -D CMAKE_BUILD_TYPE=${BUILD_TYPE} -D ENABLE_EMBEDDED_COMPILER=${ENABLE_EMBEDDED_COMPILER} $CMAKE_FLAGS ../sources
[[ "$BUILD_TARGETS" != 'all' ]] && BUILD_TARGETS_STRING="--target $BUILD_TARGETS"
cmake --build . $BUILD_TARGETS_STRING -- -j $THREADS
popd

7
ci/check-docker.sh Executable file
View File

@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -e -x
source default-config
command -v docker > /dev/null || die "You need to install Docker"
docker ps > /dev/null || die "You need to have access to Docker: run '$SUDO usermod -aG docker $USER' and relogin"

20
ci/check-syntax.sh Executable file
View File

@ -0,0 +1,20 @@
#!/usr/bin/env bash
set -e -x
source default-config
$SUDO apt-get install -y jq
[[ -d "${WORKSPACE}/sources" ]] || die "Run get-sources.sh first"
mkdir -p "${WORKSPACE}/build"
pushd "${WORKSPACE}/build"
cmake -D CMAKE_BUILD_TYPE=Debug $CMAKE_FLAGS ../sources
make -j $THREADS re2_st # Generated headers
jq --raw-output '.[] | .command' compile_commands.json | grep -v -P -- '-c .+/contrib/' | sed -r -e 's/-o\s+\S+/-fsyntax-only/' > syntax-commands
xargs --arg-file=syntax-commands --max-procs=$THREADS --replace /bin/sh -c "{}"
popd

10
ci/create-sources-tarball.sh Executable file
View File

@ -0,0 +1,10 @@
#!/usr/bin/env bash
set -e -x
source default-config
if [[ -d "${WORKSPACE}/sources" ]]; then
tar -c -z -f "${WORKSPACE}/sources.tar.gz" --directory "${WORKSPACE}/sources" .
else
die "Run get-sources first"
fi

65
ci/default-config Normal file
View File

@ -0,0 +1,65 @@
#!/usr/bin/env bash
set -e -x
if [[ -z "$INITIALIZED" ]]; then
INITIALIZED=1
SCRIPTPATH=$(pwd)
WORKSPACE=${SCRIPTPATH}/workspace
PROJECT_ROOT=$(cd $SCRIPTPATH/.. && pwd)
# All scripts take no arguments. All arguments must be in config.
# get-sources
SOURCES_METHOD=local # clone, local, tarball
SOURCES_CLONE_URL="https://github.com/yandex/ClickHouse.git"
SOURCES_BRANCH="master"
SOURCES_COMMIT=HEAD # do checkout of this commit after clone
# prepare-toolchain
COMPILER=gcc # gcc, clang
COMPILER_INSTALL_METHOD=packages # packages, sources
COMPILER_PACKAGE_VERSION=7 # or 6.0 for clang
# install-compiler-from-sources
CLANG_SOURCES_BRANCH=trunk # or tags/RELEASE_600/final
GCC_SOURCES_VERSION=latest # or gcc-7.1.0
# install-libraries
USE_LLVM_LIBRARIES_FROM_SYSTEM=0 # 0 or 1
ENABLE_EMBEDDED_COMPILER=1
# build
BUILD_METHOD=normal # normal, debian
BUILD_TARGETS=clickhouse # tagtet name, all; only for "normal"
BUILD_TYPE=RelWithDebInfo # RelWithDebInfo, Debug, ASan, TSan
CMAKE_FLAGS=""
# prepare-docker-image-ubuntu
DOCKER_UBUNTU_VERSION=bionic
DOCKER_UBUNTU_ARCH=arm64 # How the architecture is named in a tarball at https://partner-images.canonical.com/core/
DOCKER_UBUNTU_QUEMU_ARCH=aarch64 # How the architecture is named in QEMU
DOCKER_UBUNTU_TAG_ARCH=arm64 # How the architecture is named in Docker
DOCKER_UBUNTU_QEMU_VER=v2.9.1
DOCKER_UBUNTU_REPO=multiarch/ubuntu-core
THREADS=$(grep -c ^processor /proc/cpuinfo || nproc || sysctl -a | grep -F 'hw.ncpu')
# All scripts should return 0 in case of success, 1 in case of permanent error,
# 2 in case of temporary error, any other code in case of permanent error.
function die {
echo ${1:-Error}
exit ${2:1}
}
[[ $EUID -ne 0 ]] && SUDO=sudo
command -v apt-get && $SUDO apt-get update
# Configuration parameters may be overriden with CONFIG environment variable pointing to config file.
[[ -n "$CONFIG" ]] && source $CONFIG
mkdir -p $WORKSPACE
fi

View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2016 Multiarch
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,53 @@
Source: https://github.com/multiarch/ubuntu-core
Commit: 3972a7794b40a965615abd710759d3ed439c9a55
# :earth_africa: ubuntu-core
![](https://raw.githubusercontent.com/multiarch/dockerfile/master/logo.jpg)
Multiarch Ubuntu images for Docker.
Based on https://github.com/tianon/docker-brew-ubuntu-core/
* `multiarch/ubuntu-core` on [Docker Hub](https://hub.docker.com/r/multiarch/ubuntu-core/)
* [Available tags](https://hub.docker.com/r/multiarch/ubuntu-core/tags/)
## Usage
Once you need to configure binfmt-support on your Docker host.
This works locally or remotely (i.e using boot2docker or swarm).
```console
# configure binfmt-support on the Docker host (works locally or remotely, i.e: using boot2docker)
$ docker run --rm --privileged multiarch/qemu-user-static:register --reset
```
Then you can run an `armhf` image from your `x86_64` Docker host.
```console
$ docker run -it --rm multiarch/ubuntu-core:armhf-wily
root@a0818570f614:/# uname -a
Linux a0818570f614 4.1.13-boot2docker #1 SMP Fri Nov 20 19:05:50 UTC 2015 armv7l armv7l armv7l GNU/Linux
root@a0818570f614:/# exit
```
Or an `x86_64` image from your `x86_64` Docker host, directly, without qemu emulation.
```console
$ docker run -it --rm multiarch/ubuntu-core:amd64-wily
root@27fe384370c9:/# uname -a
Linux 27fe384370c9 4.1.13-boot2docker #1 SMP Fri Nov 20 19:05:50 UTC 2015 x86_64 x86_64 x86_64 GNU/Linux
root@27fe384370c9:/#
```
It also works for `arm64`
```console
$ docker run -it --rm multiarch/ubuntu-core:arm64-wily
root@723fb9f184fa:/# uname -a
Linux 723fb9f184fa 4.1.13-boot2docker #1 SMP Fri Nov 20 19:05:50 UTC 2015 aarch64 aarch64 aarch64 GNU/Linux
```
## License
MIT

96
ci/docker-multiarch/update.sh Executable file
View File

@ -0,0 +1,96 @@
#!/usr/bin/env bash
set -e -x
# A POSIX variable
OPTIND=1 # Reset in case getopts has been used previously in the shell.
while getopts "a:v:q:u:d:t:" opt; do
case "$opt" in
a) ARCH=$OPTARG
;;
v) VERSION=$OPTARG
;;
q) QEMU_ARCH=$OPTARG
;;
u) QEMU_VER=$OPTARG
;;
d) DOCKER_REPO=$OPTARG
;;
t) TAG_ARCH=$OPTARG
;;
esac
done
thisTarBase="ubuntu-$VERSION-core-cloudimg-$ARCH"
thisTar="$thisTarBase-root.tar.gz"
baseUrl="https://partner-images.canonical.com/core/$VERSION"
# install qemu-user-static
if [ -n "${QEMU_ARCH}" ]; then
if [ ! -f x86_64_qemu-${QEMU_ARCH}-static.tar.gz ]; then
wget -N https://github.com/multiarch/qemu-user-static/releases/download/${QEMU_VER}/x86_64_qemu-${QEMU_ARCH}-static.tar.gz
fi
tar -xvf x86_64_qemu-${QEMU_ARCH}-static.tar.gz -C $ROOTFS/usr/bin/
fi
# get the image
if \
wget -q --spider "$baseUrl/current" \
&& wget -q --spider "$baseUrl/current/$thisTar" \
; then
baseUrl+='/current'
fi
wget -qN "$baseUrl/"{{MD5,SHA{1,256}}SUMS{,.gpg},"$thisTarBase.manifest",'unpacked/build-info.txt'} || true
wget -N "$baseUrl/$thisTar"
# check checksum
if [ -f SHA256SUMS ]; then
sha256sum="$(sha256sum "$thisTar" | cut -d' ' -f1)"
if ! grep -q "$sha256sum" SHA256SUMS; then
echo >&2 "error: '$thisTar' has invalid SHA256"
exit 1
fi
fi
cat > Dockerfile <<-EOF
FROM scratch
ADD $thisTar /
ENV ARCH=${ARCH} UBUNTU_SUITE=${VERSION} DOCKER_REPO=${DOCKER_REPO}
EOF
# add qemu-user-static binary
if [ -n "${QEMU_ARCH}" ]; then
cat >> Dockerfile <<EOF
# Add qemu-user-static binary for amd64 builders
ADD x86_64_qemu-${QEMU_ARCH}-static.tar.gz /usr/bin
EOF
fi
cat >> Dockerfile <<-EOF
# a few minor docker-specific tweaks
# see https://github.com/docker/docker/blob/master/contrib/mkimage/debootstrap
RUN echo '#!/bin/sh' > /usr/sbin/policy-rc.d \\
&& echo 'exit 101' >> /usr/sbin/policy-rc.d \\
&& chmod +x /usr/sbin/policy-rc.d \\
&& dpkg-divert --local --rename --add /sbin/initctl \\
&& cp -a /usr/sbin/policy-rc.d /sbin/initctl \\
&& sed -i 's/^exit.*/exit 0/' /sbin/initctl \\
&& echo 'force-unsafe-io' > /etc/dpkg/dpkg.cfg.d/docker-apt-speedup \\
&& echo 'DPkg::Post-Invoke { "rm -f /var/cache/apt/archives/*.deb /var/cache/apt/archives/partial/*.deb /var/cache/apt/*.bin || true"; };' > /etc/apt/apt.conf.d/docker-clean \\
&& echo 'APT::Update::Post-Invoke { "rm -f /var/cache/apt/archives/*.deb /var/cache/apt/archives/partial/*.deb /var/cache/apt/*.bin || true"; };' >> /etc/apt/apt.conf.d/docker-clean \\
&& echo 'Dir::Cache::pkgcache ""; Dir::Cache::srcpkgcache "";' >> /etc/apt/apt.conf.d/docker-clean \\
&& echo 'Acquire::Languages "none";' > /etc/apt/apt.conf.d/docker-no-languages \\
&& echo 'Acquire::GzipIndexes "true"; Acquire::CompressionTypes::Order:: "gz";' > /etc/apt/apt.conf.d/docker-gzip-indexes
# enable the universe
RUN sed -i 's/^#\s*\(deb.*universe\)$/\1/g' /etc/apt/sources.list
# overwrite this with 'CMD []' in a dependent Dockerfile
CMD ["/bin/bash"]
EOF
docker build -t "${DOCKER_REPO}:${TAG_ARCH}-${VERSION}" .
docker run --rm "${DOCKER_REPO}:${TAG_ARCH}-${VERSION}" /bin/bash -ec "echo Hello from Ubuntu!"

18
ci/get-sources.sh Executable file
View File

@ -0,0 +1,18 @@
#!/usr/bin/env bash
set -e -x
source default-config
if [[ "$SOURCES_METHOD" == "clone" ]]; then
$SUDO apt-get install -y git
SOURCES_DIR="${WORKSPACE}/sources"
mkdir -p "${SOURCES_DIR}"
git clone --recursive --branch "$SOURCES_BRANCH" "$SOURCES_CLONE_URL" "${SOURCES_DIR}"
pushd "${SOURCES_DIR}"
git checkout "$SOURCES_COMMIT"
popd
elif [[ "$SOURCES_METHOD" == "local" ]]; then
ln -f -s "${PROJECT_ROOT}" "${WORKSPACE}/sources"
else
die "Unknown SOURCES_METHOD"
fi

View File

@ -0,0 +1,29 @@
#!/usr/bin/env bash
set -e -x
source default-config
# TODO Non debian systems
# TODO Install from PPA on older Ubuntu
if [ -f '/etc/lsb-release' ]; then
source /etc/lsb-release
if [[ "$DISTRIB_ID" == "Ubuntu" ]]; then
if [[ "$COMPILER" == "gcc" ]]; then
$SUDO apt-get -y install gcc-${COMPILER_PACKAGE_VERSION} g++-${COMPILER_PACKAGE_VERSION}
export CC=gcc-${COMPILER_PACKAGE_VERSION}
export CXX=g++-${COMPILER_PACKAGE_VERSION}
elif [[ "$COMPILER" == "clang" ]]; then
[[ $(uname -m) == "x86_64" ]] && LLD="lld-${COMPILER_PACKAGE_VERSION}"
$SUDO apt-get -y install clang-${COMPILER_PACKAGE_VERSION} "$LLD" libc++-dev libc++abi-dev
export CC=clang-${COMPILER_PACKAGE_VERSION}
export CXX=clang++-${COMPILER_PACKAGE_VERSION}
else
die "Unknown compiler specified"
fi
else
die "Unknown Linux variant"
fi
else
die "Unknown OS"
fi

View File

@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -e -x
source default-config
if [[ "$COMPILER" == "gcc" ]]; then
. build-gcc-from-sources.sh
elif [[ "$COMPILER" == "clang" ]]; then
. build-clang-from-sources.sh
else
die "Unknown COMPILER"
fi

12
ci/install-libraries.sh Executable file
View File

@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -e -x
source default-config
# TODO Non-debian systems
$SUDO apt-get -y install libssl-dev libicu-dev libreadline-dev libmysqlclient-dev unixodbc-dev
if [[ "$ENABLE_EMBEDDED_COMPILER" == 1 && "$USE_LLVM_LIBRARIES_FROM_SYSTEM" == 1 ]]; then
$SUDO apt-get -y install liblld-5.0-dev libclang-5.0-dev
fi

View File

@ -0,0 +1,12 @@
SOURCES_METHOD=local
COMPILER=clang
COMPILER_INSTALL_METHOD=packages
COMPILER_PACKAGE_VERSION=6.0
USE_LLVM_LIBRARIES_FROM_SYSTEM=0
BUILD_METHOD=normal
BUILD_TARGETS=clickhouse
BUILD_TYPE=Debug
ENABLE_EMBEDDED_COMPILER=0
CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_TCMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0"
# TODO it doesn't build with -D ENABLE_NETSSL=0 -D ENABLE_MONGODB=0 -D ENABLE_MYSQL=0 -D ENABLE_DATA_ODBC=0

18
ci/jobs/quick-build/run.sh Executable file
View File

@ -0,0 +1,18 @@
#!/usr/bin/env bash
set -e -x
# How to run:
# From "ci" directory:
# jobs/quick-build/run.sh
# or:
# ./run-with-docker.sh ubuntu:bionic jobs/quick-build/run.sh
CONFIG="$(dirname $0)"/config
cd "$(dirname $0)"/../..
. default-config
. get-sources.sh
. prepare-toolchain.sh
. install-libraries.sh
. build-normal.sh

View File

@ -0,0 +1,23 @@
#!/usr/bin/env bash
set -e -x
source default-config
./check-docker.sh
# http://fl47l1n3.net/2015/12/24/binfmt/
$SUDO apt-get -y install qemu-user-static
pushd docker-multiarch
$SUDO ./update.sh \
-a "$DOCKER_UBUNTU_ARCH" \
-v "$DOCKER_UBUNTU_VERSION" \
-q "$DOCKER_UBUNTU_QUEMU_ARCH" \
-u "$DOCKER_UBUNTU_QEMU_VER" \
-d "$DOCKER_UBUNTU_REPO" \
-t "$DOCKER_UBUNTU_TAG_ARCH"
docker run --rm --privileged multiarch/qemu-user-static:register
popd

15
ci/prepare-toolchain.sh Executable file
View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
set -e -x
source default-config
# TODO Non debian systems
apt-cache search cmake3 | grep -P '^cmake3 ' && $SUDO apt-get -y install cmake3 || $SUDO apt-get -y install cmake
if [[ "$COMPILER_INSTALL_METHOD" == "packages" ]]; then
. install-compiler-from-packages.sh;
elif [[ "$COMPILER_INSTALL_METHOD" == "sources" ]]; then
. install-compiler-from-sources.sh
else
die "Unknown COMPILER_INSTALL_METHOD"
fi

View File

@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -e -x
source default-config
$SUDO apt-get -y install vagrant virtualbox
pushd "vagrant-freebsd"
vagrant up
vagrant ssh-config > vagrant-ssh
ssh -F vagrant-ssh default 'uname -a'
scp -F vagrant-ssh -r ../../ci default:~
popd

View File

@ -0,0 +1,18 @@
#!/usr/bin/env bash
set -e -x
# Usage example:
# ./run-with-docker.sh centos:centos6 ./run-clickhouse-from-binaries.sh
source default-config
SERVER_BIN="${WORKSPACE}/build/dbms/src/Server/clickhouse"
SERVER_CONF="${WORKSPACE}/sources/dbms/src/Server/config.xml"
SERVER_DATADIR="${WORKSPACE}/clickhouse"
[[ -x "$SERVER_BIN" ]] || die "Run build-normal.sh first"
[[ -r "$SERVER_CONF" ]] || die "Run get-sources.sh first"
mkdir -p "${SERVER_DATADIR}"
$SERVER_BIN server --config-file "$SERVER_CONF" --pid-file="${WORKSPACE}/clickhouse.pid" -- --path "$SERVER_DATADIR"

6
ci/run-with-docker.sh Executable file
View File

@ -0,0 +1,6 @@
#!/usr/bin/env bash
set -e -x
PROJECT_ROOT="$(cd "$(dirname "$0")/.."; pwd -P)"
[[ -n "$CONFIG" ]] && DOCKER_ENV="--env=CONFIG"
docker run -t --network=host --mount=type=bind,source=${PROJECT_ROOT},destination=/ClickHouse --workdir=/ClickHouse/ci $DOCKER_ENV "$1" "$2"

1
ci/vagrant-freebsd/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.vagrant

3
ci/vagrant-freebsd/Vagrantfile vendored Normal file
View File

@ -0,0 +1,3 @@
Vagrant.configure("2") do |config|
config.vm.box = "generic/freebsd11"
end

View File

@ -22,6 +22,10 @@ if (NOT MSVC)
set (NOT_MSVC 1)
endif ()
if (NOT APPLE)
set (NOT_APPLE 1)
endif ()
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (COMPILER_GCC 1)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")

View File

@ -1,107 +1,57 @@
option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile' option for query execution" 1)
option (USE_INTERNAL_LLVM_LIBRARY "Use bundled or system LLVM library. Default: system library for quicker developer builds." 0)
if (ENABLE_EMBEDDED_COMPILER)
# Based on source code of YT.
# Authors: Ivan Puzyrevskiy, Alexey Lukyanchikov, Ruslan Savchenko.
if (USE_INTERNAL_LLVM_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/CMakeLists.txt")
message (WARNING "submodule contrib/llvm is missing. to fix try run: \n git submodule update --init --recursive")
set (USE_INTERNAL_LLVM_LIBRARY 0)
endif ()
# Find LLVM includes and libraries.
#
# LLVM_VERSION - LLVM version.
# LLVM_INCLUDE_DIRS - Directory containing LLVM headers.
# LLVM_LIBRARY_DIRS - Directory containing LLVM libraries.
# LLVM_CXXFLAGS - C++ compiler flags for files that include LLVM headers.
# LLVM_FOUND - True if LLVM was found.
if (NOT USE_INTERNAL_LLVM_LIBRARY)
set (LLVM_PATHS "/usr/local/lib/llvm")
# llvm_map_components_to_libraries - Maps LLVM used components to required libraries.
# Usage: llvm_map_components_to_libraries(REQUIRED_LLVM_LIBRARIES core jit interpreter native ...)
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(LLVM_VERSION_POSTFIX "${COMPILER_POSTFIX}" CACHE STRING "")
if (LLVM_VERSION)
find_package(LLVM ${LLVM_VERSION} CONFIG PATHS ${LLVM_PATHS})
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
find_package(LLVM ${CMAKE_CXX_COMPILER_VERSION} CONFIG PATHS ${LLVM_PATHS})
else ()
if (ARCH_FREEBSD)
set(LLVM_VERSION_POSTFIX "50" CACHE STRING "")
else()
set(LLVM_VERSION_POSTFIX "-5.0" CACHE STRING "")
find_package (LLVM 6 CONFIG PATHS ${LLVM_PATHS})
if (NOT LLVM_FOUND)
find_package (LLVM 5 CONFIG PATHS ${LLVM_PATHS})
endif ()
if (NOT LLVM_FOUND)
find_package (LLVM 7 CONFIG PATHS ${LLVM_PATHS})
endif ()
endif ()
find_program(LLVM_CONFIG_EXECUTABLE
NAMES llvm-config${LLVM_VERSION_POSTFIX} llvm-config llvm-config-devel
PATHS $ENV{LLVM_ROOT}/bin)
if (LLVM_FOUND)
# Remove dynamically-linked zlib and libedit from LLVM's dependencies:
set_target_properties(LLVMSupport PROPERTIES INTERFACE_LINK_LIBRARIES "-lpthread;LLVMDemangle")
set_target_properties(LLVMLineEditor PROPERTIES INTERFACE_LINK_LIBRARIES "LLVMSupport")
mark_as_advanced(LLVM_CONFIG_EXECUTABLE)
if(NOT LLVM_CONFIG_EXECUTABLE)
message(WARNING "Cannot find LLVM (looking for `llvm-config${LLVM_VERSION_POSTFIX}`, `llvm-config`, `llvm-config-devel`). Please, provide LLVM_ROOT environment variable.")
else()
set(LLVM_FOUND TRUE)
execute_process(
COMMAND ${LLVM_CONFIG_EXECUTABLE} --version
OUTPUT_VARIABLE LLVM_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(LLVM_VERSION VERSION_LESS "5")
message(FATAL_ERROR "LLVM 5+ is required. You have ${LLVM_VERSION} (${LLVM_CONFIG_EXECUTABLE})")
endif()
message(STATUS "LLVM config: ${LLVM_CONFIG_EXECUTABLE}; version: ${LLVM_VERSION}")
execute_process(
COMMAND ${LLVM_CONFIG_EXECUTABLE} --includedir
OUTPUT_VARIABLE LLVM_INCLUDE_DIRS
OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(
COMMAND ${LLVM_CONFIG_EXECUTABLE} --libdir
OUTPUT_VARIABLE LLVM_LIBRARY_DIRS
OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(
COMMAND ${LLVM_CONFIG_EXECUTABLE} --cxxflags
OUTPUT_VARIABLE LLVM_CXXFLAGS
OUTPUT_STRIP_TRAILING_WHITESPACE)
execute_process(
COMMAND ${LLVM_CONFIG_EXECUTABLE} --targets-built
OUTPUT_VARIABLE LLVM_TARGETS_BUILT
OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REPLACE " " ";" LLVM_TARGETS_BUILT "${LLVM_TARGETS_BUILT}")
if (USE_STATIC_LIBRARIES)
set (LLVM_CONFIG_ADD "--link-static")
endif()
# Get the link libs we need.
function(llvm_map_components_to_libraries RESULT)
execute_process(
COMMAND ${LLVM_CONFIG_EXECUTABLE} ${LLVM_CONFIG_ADD} --libs ${ARGN}
OUTPUT_VARIABLE _tmp
OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REPLACE " " ";" _libs_module "${_tmp}")
#message(STATUS "LLVM Libraries for '${ARGN}': ${_libs_module}")
execute_process(
COMMAND ${LLVM_CONFIG_EXECUTABLE} --system-libs ${ARGN}
OUTPUT_VARIABLE _libs_system
OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REPLACE "\n" " " _libs_system "${_libs_system}")
string(REPLACE " " " " _libs_system "${_libs_system}")
string(REPLACE " " ";" _libs_system "${_libs_system}")
set(${RESULT} ${_libs_module} ${_libs_system} PARENT_SCOPE)
endfunction(llvm_map_components_to_libraries)
message(STATUS "LLVM Include Directory: ${LLVM_INCLUDE_DIRS}")
message(STATUS "LLVM Library Directory: ${LLVM_LIBRARY_DIRS}")
message(STATUS "LLVM C++ Compiler: ${LLVM_CXXFLAGS}")
endif()
if (LLVM_FOUND AND LLVM_INCLUDE_DIRS AND LLVM_LIBRARY_DIRS)
option(LLVM_HAS_RTTI "Enable if LLVM was build with RTTI enabled" ON)
set (USE_EMBEDDED_COMPILER 1)
endif()
else()
set (LLVM_FOUND 1)
set (USE_EMBEDDED_COMPILER 1)
set (LLVM_VERSION "7.0.0bundled")
set (LLVM_INCLUDE_DIRS
${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/include
${ClickHouse_BINARY_DIR}/contrib/llvm/llvm/include
${ClickHouse_SOURCE_DIR}/contrib/llvm/clang/include
${ClickHouse_BINARY_DIR}/contrib/llvm/clang/include
${ClickHouse_BINARY_DIR}/contrib/llvm/llvm/tools/clang/include
${ClickHouse_SOURCE_DIR}/contrib/llvm/lld/include
${ClickHouse_BINARY_DIR}/contrib/llvm/lld/include
${ClickHouse_BINARY_DIR}/contrib/llvm/llvm/tools/lld/include)
set (LLVM_LIBRARY_DIRS ${ClickHouse_BINARY_DIR}/contrib/llvm/llvm)
endif()
if (LLVM_FOUND)
message(STATUS "LLVM version: ${LLVM_PACKAGE_VERSION}")
message(STATUS "LLVM include Directory: ${LLVM_INCLUDE_DIRS}")
message(STATUS "LLVM library Directory: ${LLVM_LIBRARY_DIRS}")
message(STATUS "LLVM C++ compiler flags: ${LLVM_CXXFLAGS}")
endif()
endif()

View File

@ -1,15 +1,10 @@
if (APPLE)
# lib from libs/libcommon
set (RT_LIBRARY "apple_rt")
else ()
elseif (ARCH_FREEBSD)
find_library (RT_LIBRARY rt)
else ()
set (RT_LIBRARY "")
endif ()
message(STATUS "Using rt: ${RT_LIBRARY}")
function (target_link_rt_by_force TARGET)
if (NOT APPLE)
set (FLAGS "-Wl,-no-as-needed -lrt -Wl,-as-needed")
set_property (TARGET ${TARGET} APPEND PROPERTY LINK_FLAGS "${FLAGS}")
endif ()
endfunction ()

5
cmake/find_termcap.cmake Normal file
View File

@ -0,0 +1,5 @@
find_library (TERMCAP_LIBRARY termcap)
if (NOT TERMCAP_LIBRARY)
find_library (TERMCAP_LIBRARY tinfo)
endif()
message (STATUS "Using termcap: ${TERMCAP_LIBRARY}")

View File

@ -21,7 +21,7 @@ if (USE_INTERNAL_RE2_LIBRARY)
endif ()
if (USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY)
set (BUILD_TESTING ${ENABLE_TESTS} CACHE INTERNAL "")
set (BUILD_TESTING 0 CACHE INTERNAL "")
add_subdirectory (double-conversion)
endif ()
@ -113,11 +113,7 @@ if (USE_INTERNAL_RDKAFKA_LIBRARY)
endif ()
if (USE_INTERNAL_CAPNP_LIBRARY)
if (APPLE) # tests never end
set (BUILD_TESTING 0 CACHE INTERNAL "")
else ()
set (BUILD_TESTING ${ENABLE_TESTS} CACHE INTERNAL "")
endif ()
set (_save ${CMAKE_CXX_EXTENSIONS})
set (CMAKE_CXX_EXTENSIONS)
add_subdirectory (capnproto/c++)
@ -150,3 +146,7 @@ if (USE_INTERNAL_POCO_LIBRARY)
target_include_directories(Crypto PUBLIC ${OPENSSL_INCLUDE_DIR})
endif ()
endif ()
if (USE_INTERNAL_LLVM_LIBRARY)
add_subdirectory (llvm/llvm)
endif ()

View File

@ -1088,7 +1088,7 @@ class sparsegroup {
// This is equivalent to memmove(), but faster on my Intel P4,
// at least with gcc4.1 -O2 / glibc 2.3.6.
for (size_type i = settings.num_buckets; i > offset; --i)
memcpy(group + i, group + i-1, sizeof(*group));
memcpy(static_cast<void*>(group + i), group + i-1, sizeof(*group));
}
// Create space at group[offset], without special assumptions about value_type
@ -1154,7 +1154,7 @@ class sparsegroup {
// at lesat with gcc4.1 -O2 / glibc 2.3.6.
assert(settings.num_buckets > 0);
for (size_type i = offset; i < settings.num_buckets-1; ++i)
memcpy(group + i, group + i+1, sizeof(*group)); // hopefully inlined!
memcpy(static_cast<void*>(group + i), group + i+1, sizeof(*group)); // hopefully inlined!
group = settings.realloc_or_die(group, settings.num_buckets-1);
}

View File

@ -1,4 +1,3 @@
message (STATUS "Building: tcmalloc_minimal_internal")
add_library (tcmalloc_minimal_internal

1
contrib/llvm vendored Submodule

@ -0,0 +1 @@
Subproject commit 163def217817c90fb982a6daf384744d8472b92b

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit 2d5a158303adf9d47b980cdcfdb26cee1460704e
Subproject commit 3a2d0a833a22ef5e1164a9ada54e3253cb038904

View File

@ -99,6 +99,15 @@ else ()
install (TARGETS dbms LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse)
endif ()
if (USE_EMBEDDED_COMPILER)
llvm_map_components_to_libnames(REQUIRED_LLVM_LIBRARIES all)
if (TERMCAP_LIBRARY)
list(APPEND REQUIRED_LLVM_LIBRARIES ${TERMCAP_LIBRARY})
endif()
target_link_libraries (dbms ${REQUIRED_LLVM_LIBRARIES})
target_include_directories (dbms BEFORE PUBLIC ${LLVM_INCLUDE_DIRS})
endif ()
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.

View File

@ -0,0 +1,34 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionWindowFunnel.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/FactoryHelpers.h>
namespace DB
{
namespace
{
AggregateFunctionPtr createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes & arguments, const Array & params)
{
if (params.size() != 1)
throw Exception{"Aggregate function " + name + " requires exactly one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
if (arguments.size() < 2)
throw Exception("Aggregate function " + name + " requires one timestamp argument and at least one event condition.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (arguments.size() > AggregateFunctionWindowFunnelData::max_events + 1)
throw Exception("Too many event arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
return std::make_shared<AggregateFunctionWindowFunnel>(arguments, params);
}
}
void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory & factory)
{
factory.registerFunction("windowFunnel", createAggregateFunctionWindowFunnel, AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,260 @@
#pragma once
#include <iostream>
#include <sstream>
#include <unordered_set>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/ArenaAllocator.h>
#include <Common/typeid_cast.h>
#include <ext/range.h>
#include <AggregateFunctions/IAggregateFunction.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
}
struct ComparePairFirst final
{
template <typename T1, typename T2>
bool operator()(const std::pair<T1, T2> & lhs, const std::pair<T1, T2> & rhs) const
{
return lhs.first < rhs.first;
}
};
struct AggregateFunctionWindowFunnelData
{
static constexpr auto max_events = 32;
using TimestampEvent = std::pair<UInt32, UInt8>;
static constexpr size_t bytes_on_stack = 64;
using TimestampEvents = PODArray<TimestampEvent, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
using Comparator = ComparePairFirst;
bool sorted = true;
TimestampEvents events_list;
size_t size() const
{
return events_list.size();
}
void add(UInt32 timestamp, UInt8 event)
{
// Since most events should have already been sorted by timestamp.
if (sorted && events_list.size() > 0 && events_list.back().first > timestamp)
sorted = false;
events_list.emplace_back(timestamp, event);
}
void merge(const AggregateFunctionWindowFunnelData & other)
{
const auto size = events_list.size();
events_list.insert(std::begin(other.events_list), std::end(other.events_list));
/// either sort whole container or do so partially merging ranges afterwards
if (!sorted && !other.sorted)
std::sort(std::begin(events_list), std::end(events_list), Comparator{});
else
{
const auto begin = std::begin(events_list);
const auto middle = std::next(begin, size);
const auto end = std::end(events_list);
if (!sorted)
std::sort(begin, middle, Comparator{});
if (!other.sorted)
std::sort(middle, end, Comparator{});
std::inplace_merge(begin, middle, end, Comparator{});
}
sorted = true;
}
void sort()
{
if (!sorted)
{
std::sort(std::begin(events_list), std::end(events_list), Comparator{});
sorted = true;
}
}
void serialize(WriteBuffer & buf) const
{
writeBinary(sorted, buf);
writeBinary(events_list.size(), buf);
for (const auto & events : events_list)
{
writeBinary(events.first, buf);
writeBinary(events.second, buf);
}
}
void deserialize(ReadBuffer & buf)
{
readBinary(sorted, buf);
size_t size;
readBinary(size, buf);
/// TODO Protection against huge size
events_list.clear();
events_list.resize(size);
UInt32 timestamp;
UInt8 event;
for (size_t i = 0; i < size; ++i)
{
readBinary(timestamp, buf);
readBinary(event, buf);
events_list.emplace_back(timestamp, event);
}
}
};
/** Calculates the max event level in a sliding window.
* The max size of events is 32, that's enough for funnel analytics
*
* Usage:
* - windowFunnel(window)(timestamp, cond1, cond2, cond3, ....)
*/
class AggregateFunctionWindowFunnel final
: public IAggregateFunctionDataHelper<AggregateFunctionWindowFunnelData, AggregateFunctionWindowFunnel>
{
private:
UInt32 window;
UInt8 events_size;
// Loop through the entire events_list, update the event timestamp value
// The level path must be 1---2---3---...---check_events_size, find the max event level that statisfied the path in the sliding window.
// If found, returns the max event level, else return 0.
// The Algorithm complexity is O(n).
UInt8 getEventLevel(const AggregateFunctionWindowFunnelData & data) const
{
if (data.size() == 0)
return 0;
if (events_size == 1)
return 1;
const_cast<AggregateFunctionWindowFunnelData &>(data).sort();
// events_timestamp stores the timestamp that latest i-th level event happen withing time window after previous level event.
// timestamp defaults to -1, which unsigned timestamp value never meet
std::vector<Int32> events_timestamp(events_size, -1);
for (const auto & pair : data.events_list)
{
const auto & timestamp = pair.first;
const auto & event_idx = pair.second - 1;
if (event_idx == 0)
events_timestamp[0] = timestamp;
else if (events_timestamp[event_idx - 1] >= 0 && timestamp <= events_timestamp[event_idx - 1] + window)
{
events_timestamp[event_idx] = events_timestamp[event_idx - 1];
if (event_idx + 1 == events_size)
return events_size;
}
}
for (size_t event = events_timestamp.size(); event > 0; --event)
{
if (events_timestamp[event - 1] >= 0)
return event;
}
return 0;
}
public:
String getName() const override
{
return "windowFunnel";
}
AggregateFunctionWindowFunnel(const DataTypes & arguments, const Array & params)
{
const auto time_arg = arguments.front().get();
if (!typeid_cast<const DataTypeDateTime *>(time_arg) && !typeid_cast<const DataTypeUInt32 *>(time_arg))
throw Exception{"Illegal type " + time_arg->getName() + " of first argument of aggregate function " + getName()
+ ", must be DateTime or UInt32"};
for (const auto i : ext::range(1, arguments.size()))
{
auto cond_arg = arguments[i].get();
if (!typeid_cast<const DataTypeUInt8 *>(cond_arg))
throw Exception{"Illegal type " + cond_arg->getName() + " of argument " + toString(i + 1) + " of aggregate function "
+ getName() + ", must be UInt8",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
events_size = arguments.size() - 1;
window = params.at(0).safeGet<UInt64>();
}
DataTypePtr getReturnType() const override
{
return std::make_shared<DataTypeUInt8>();
}
void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
{
UInt8 event_level = 0;
for (const auto i : ext::range(1, events_size + 1))
{
auto event = static_cast<const ColumnVector<UInt8> *>(columns[i])->getData()[row_num];
if (event)
{
event_level = i;
break;
}
}
if (event_level)
{
this->data(place).add(static_cast<const ColumnVector<UInt32> *>(columns[0])->getData()[row_num], event_level);
}
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
this->data(place).serialize(buf);
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
{
this->data(place).deserialize(buf);
}
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
{
static_cast<ColumnUInt8 &>(to).getData().push_back(getEventLevel(this->data(place)));
}
const char * getHeaderFilePath() const override
{
return __FILE__;
}
};
}

View File

@ -14,6 +14,7 @@ void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory &);
void registerAggregateFunctionsMinMaxAny(AggregateFunctionFactory &);
void registerAggregateFunctionsStatisticsStable(AggregateFunctionFactory &);
void registerAggregateFunctionsStatisticsSimple(AggregateFunctionFactory &);
@ -45,6 +46,7 @@ void registerAggregateFunctions()
registerAggregateFunctionGroupArrayInsertAt(factory);
registerAggregateFunctionsQuantile(factory);
registerAggregateFunctionsSequenceMatch(factory);
registerAggregateFunctionWindowFunnel(factory);
registerAggregateFunctionsMinMaxAny(factory);
registerAggregateFunctionsStatisticsStable(factory);
registerAggregateFunctionsStatisticsSimple(factory);

View File

@ -17,6 +17,7 @@
#include <Common/Exception.h>
#include <Common/NetException.h>
#include <Common/CurrentMetrics.h>
#include <Common/DNSResolver.h>
#include <Interpreters/ClientInfo.h>
#include <Common/config.h>
@ -66,7 +67,10 @@ void Connection::connect()
{
socket = std::make_unique<Poco::Net::StreamSocket>();
}
socket->connect(resolved_address, timeouts.connection_timeout);
current_resolved_address = DNSResolver::instance().resolveAddress(host, port);
socket->connect(current_resolved_address, timeouts.connection_timeout);
socket->setReceiveTimeout(timeouts.receive_timeout);
socket->setSendTimeout(timeouts.send_timeout);
socket->setNoDelay(true);
@ -462,6 +466,14 @@ void Connection::sendExternalTablesData(ExternalTablesData & data)
LOG_DEBUG(log_wrapper.get(), msg.rdbuf());
}
Poco::Net::SocketAddress Connection::getResolvedAddress() const
{
if (connected)
return current_resolved_address;
return DNSResolver::instance().resolveAddress(host, port);
}
bool Connection::poll(size_t timeout_microseconds)
{
@ -571,6 +583,7 @@ void Connection::initBlockInput()
void Connection::setDescription()
{
auto resolved_address = getResolvedAddress();
description = host + ":" + toString(resolved_address.port());
auto ip_address = resolved_address.host().toString();
@ -610,7 +623,7 @@ void Connection::fillBlockExtraInfo(BlockExtraInfo & info) const
{
info.is_valid = true;
info.host = host;
info.resolved_address = resolved_address.toString();
info.resolved_address = getResolvedAddress().toString();
info.port = port;
info.user = user;
}

View File

@ -53,32 +53,7 @@ class Connection : private boost::noncopyable
friend class MultiplexedConnections;
public:
Connection(const String & host_, UInt16 port_, const String & default_database_,
const String & user_, const String & password_,
const ConnectionTimeouts & timeouts_,
const String & client_name_ = "client",
Protocol::Compression compression_ = Protocol::Compression::Enable,
Protocol::Secure secure_ = Protocol::Secure::Disable,
Poco::Timespan sync_request_timeout_ = Poco::Timespan(DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC, 0))
:
host(host_), port(port_), default_database(default_database_),
user(user_), password(password_), resolved_address(host, port),
client_name(client_name_),
compression(compression_),
secure(secure_),
timeouts(timeouts_),
sync_request_timeout(sync_request_timeout_),
log_wrapper(*this)
{
/// Don't connect immediately, only on first need.
if (user.empty())
user = "default";
setDescription();
}
Connection(const String & host_, UInt16 port_, const Poco::Net::SocketAddress & resolved_address_,
Connection(const String & host_, UInt16 port_,
const String & default_database_,
const String & user_, const String & password_,
const ConnectionTimeouts & timeouts_,
@ -87,10 +62,8 @@ public:
Protocol::Secure secure_ = Protocol::Secure::Disable,
Poco::Timespan sync_request_timeout_ = Poco::Timespan(DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC, 0))
:
host(host_), port(port_),
default_database(default_database_),
user(user_), password(password_),
resolved_address(resolved_address_),
host(host_), port(port_), default_database(default_database_),
user(user_), password(password_), current_resolved_address(host, port),
client_name(client_name_),
compression(compression_),
secure(secure_),
@ -189,6 +162,9 @@ public:
size_t outBytesCount() const { return out ? out->count() : 0; }
size_t inBytesCount() const { return in ? in->count() : 0; }
/// Returns initially resolved address
Poco::Net::SocketAddress getResolvedAddress() const;
private:
String host;
UInt16 port;
@ -196,10 +172,9 @@ private:
String user;
String password;
/** Address could be resolved beforehand and passed to constructor. Then 'host' and 'port' fields are used just for logging.
* Otherwise address is resolved in constructor. Thus, DNS based load balancing is not supported.
*/
Poco::Net::SocketAddress resolved_address;
/// Address is resolved during the first connection (or the following reconnects)
/// Use it only for logging purposes
Poco::Net::SocketAddress current_resolved_address;
/// For messages in log and in exceptions.
String description;

View File

@ -54,24 +54,7 @@ public:
Protocol::Secure secure_ = Protocol::Secure::Disable)
: Base(max_connections_, &Logger::get("ConnectionPool (" + host_ + ":" + toString(port_) + ")")),
host(host_), port(port_), default_database(default_database_),
user(user_), password(password_), resolved_address(host_, port_),
client_name(client_name_), compression(compression_),
secure{secure_},
timeouts(timeouts)
{
}
ConnectionPool(unsigned max_connections_,
const String & host_, UInt16 port_, const Poco::Net::SocketAddress & resolved_address_,
const String & default_database_,
const String & user_, const String & password_,
const ConnectionTimeouts & timeouts,
const String & client_name_ = "client",
Protocol::Compression compression_ = Protocol::Compression::Enable,
Protocol::Secure secure_ = Protocol::Secure::Disable)
: Base(max_connections_, &Logger::get("ConnectionPool (" + host_ + ":" + toString(port_) + ")")),
host(host_), port(port_), default_database(default_database_),
user(user_), password(password_), resolved_address(resolved_address_),
user(user_), password(password_),
client_name(client_name_), compression(compression_),
secure{secure_},
timeouts(timeouts)
@ -102,7 +85,7 @@ protected:
ConnectionPtr allocObject() override
{
return std::make_shared<Connection>(
host, port, resolved_address,
host, port,
default_database, user, password, timeouts,
client_name, compression, secure);
}
@ -114,11 +97,6 @@ private:
String user;
String password;
/** The address can be resolved in advance and passed to the constructor. Then `host` and `port` fields are meaningful only for logging.
* Otherwise, address is resolved in constructor. That is, DNS balancing is not supported.
*/
Poco::Net::SocketAddress resolved_address;
String client_name;
Protocol::Compression compression; /// Whether to compress data when interacting with the server.
Protocol::Secure secure; /// Whether to encrypt data when interacting with the server.

View File

@ -193,6 +193,7 @@ public:
bool isFixedAndContiguous() const override { return data->isFixedAndContiguous(); }
bool valuesHaveFixedSize() const override { return data->valuesHaveFixedSize(); }
size_t sizeOfValueIfFixed() const override { return data->sizeOfValueIfFixed(); }
StringRef getRawData() const override { return data->getRawData(); }
/// Not part of the common interface.

View File

@ -129,7 +129,7 @@ public:
bool isFixedAndContiguous() const override { return true; }
size_t sizeOfValueIfFixed() const override { return n; }
StringRef getRawData() const override { return StringRef(chars.data(), chars.size()); }
/// Specialized part of interface, not from IColumn.

View File

@ -268,7 +268,7 @@ public:
bool isFixedAndContiguous() const override { return true; }
size_t sizeOfValueIfFixed() const override { return sizeof(T); }
StringRef getRawData() const override { return StringRef(reinterpret_cast<const char*>(data.data()), data.size()); }
/** More efficient methods of manipulation - to manipulate with data directly. */
Container & getData()

View File

@ -308,6 +308,9 @@ public:
/// Values in column are represented as continuous memory segment of fixed size. Implies valuesHaveFixedSize.
virtual bool isFixedAndContiguous() const { return false; }
/// If isFixedAndContiguous, returns the underlying data array, otherwise throws an exception.
virtual StringRef getRawData() const { throw Exception("Column " + getName() + " is not a contiguous block of memory", ErrorCodes::NOT_IMPLEMENTED); }
/// If valuesHaveFixedSize, returns size of value, otherwise throw an exception.
virtual size_t sizeOfValueIfFixed() const { throw Exception("Values of column " + getName() + " are not fixed size.", ErrorCodes::CANNOT_GET_SIZE_OF_FIELD); }

View File

@ -15,6 +15,7 @@
#include <linux/aio_abi.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <errno.h>
/** Small wrappers for asynchronous I/O.

View File

@ -1,40 +0,0 @@
#pragma once
#include <Poco/Net/IPAddress.h>
#include <Poco/Net/SocketAddress.h>
#include <memory>
#include <ext/singleton.h>
namespace DB
{
/// A singleton implementing global and permanent DNS cache
/// It could be updated only manually via drop() method
class DNSCache : public ext::singleton<DNSCache>
{
public:
DNSCache(const DNSCache &) = delete;
/// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolve its IP
Poco::Net::IPAddress resolveHost(const std::string & host);
/// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolve its IP and port
Poco::Net::SocketAddress resolveHostAndPort(const std::string & host_and_port);
/// Drops all caches
void drop();
~DNSCache();
protected:
DNSCache();
friend class ext::singleton<DNSCache>;
struct Impl;
std::unique_ptr<Impl> impl;
};
}

View File

@ -1,4 +1,4 @@
#include "DNSCache.h"
#include "DNSResolver.h"
#include <Common/SimpleCache.h>
#include <Common/Exception.h>
#include <Core/Types.h>
@ -6,6 +6,7 @@
#include <Poco/Net/NetException.h>
#include <Poco/NumberParser.h>
#include <arpa/inet.h>
#include <atomic>
namespace DB
@ -74,34 +75,47 @@ static Poco::Net::IPAddress resolveIPAddressImpl(const std::string & host)
}
struct DNSCache::Impl
struct DNSResolver::Impl
{
SimpleCache<decltype(resolveIPAddressImpl), &resolveIPAddressImpl> cache_host;
/// If disabled, will not make cache lookups, will resolve addresses manually on each call
std::atomic<bool> disable_cache{false};
};
DNSCache::DNSCache() : impl(std::make_unique<DNSCache::Impl>()) {}
DNSResolver::DNSResolver() : impl(std::make_unique<DNSResolver::Impl>()) {}
Poco::Net::IPAddress DNSCache::resolveHost(const std::string & host)
Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host)
{
return impl->cache_host(host);
return !impl->disable_cache ? impl->cache_host(host) : resolveIPAddressImpl(host);
}
Poco::Net::SocketAddress DNSCache::resolveHostAndPort(const std::string & host_and_port)
Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_and_port)
{
String host;
UInt16 port;
splitHostAndPort(host_and_port, host, port);
return Poco::Net::SocketAddress(impl->cache_host(host), port);
return !impl->disable_cache ? Poco::Net::SocketAddress(impl->cache_host(host), port) : Poco::Net::SocketAddress(host_and_port);
}
void DNSCache::drop()
Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port)
{
return !impl->disable_cache ? Poco::Net::SocketAddress(impl->cache_host(host), port) : Poco::Net::SocketAddress(host, port);
}
void DNSResolver::dropCache()
{
impl->cache_host.drop();
}
DNSCache::~DNSCache() = default;
void DNSResolver::setDisableCacheFlag(bool is_disabled)
{
impl->disable_cache = is_disabled;
}
DNSResolver::~DNSResolver() = default;
}

View File

@ -0,0 +1,46 @@
#pragma once
#include <Poco/Net/IPAddress.h>
#include <Poco/Net/SocketAddress.h>
#include <memory>
#include <ext/singleton.h>
#include <Core/Types.h>
namespace DB
{
/// A singleton implementing DNS names resolving with optional permanent DNS cache
/// The cache could be updated only manually via drop() method
class DNSResolver : public ext::singleton<DNSResolver>
{
public:
DNSResolver(const DNSResolver &) = delete;
/// Accepts host names like 'example.com' or '127.0.0.1' or '::1' and resolve its IP
Poco::Net::IPAddress resolveHost(const std::string & host);
/// Accepts host names like 'example.com:port' or '127.0.0.1:port' or '[::1]:port' and resolve its IP and port
Poco::Net::SocketAddress resolveAddress(const std::string & host_and_port);
Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port);
/// Disables caching
void setDisableCacheFlag(bool is_disabled = true);
/// Drops all caches
void dropCache();
~DNSResolver();
protected:
DNSResolver();
friend class ext::singleton<DNSResolver>;
struct Impl;
std::unique_ptr<Impl> impl;
};
}

View File

@ -1,4 +1,3 @@
#include <errno.h>
#include <string.h>
#include <cxxabi.h>

View File

@ -408,7 +408,7 @@ protected:
/// Copy to a new location and zero the old one.
x.setHash(hash_value);
memcpy(&buf[place_value], &x, sizeof(x));
memcpy(static_cast<void*>(&buf[place_value]), &x, sizeof(x));
x.setZero();
/// Then the elements that previously were in collision with this can move to the old place.
@ -726,7 +726,7 @@ public:
{
size_t place_value = findEmptyCell(grower.place(hash_value));
memcpy(&buf[place_value], cell, sizeof(*cell));
memcpy(static_cast<void*>(&buf[place_value]), cell, sizeof(*cell));
++m_size;
if (unlikely(grower.overflow(m_size)))
@ -897,7 +897,7 @@ public:
this->clearHasZero();
m_size = 0;
memset(buf, 0, grower.bufSize() * sizeof(*buf));
memset(static_cast<void*>(buf), 0, grower.bufSize() * sizeof(*buf));
}
/// After executing this function, the table can only be destroyed,

View File

@ -79,6 +79,8 @@
M(CompileAttempt) \
M(CompileSuccess) \
\
M(CompileFunction) \
\
M(ExternalSortWritePart) \
M(ExternalSortMerge) \
M(ExternalAggregationWritePart) \
@ -138,7 +140,9 @@
M(RWLockAcquiredReadLocks) \
M(RWLockAcquiredWriteLocks) \
M(RWLockReadersWaitMilliseconds) \
M(RWLockWritersWaitMilliseconds)
M(RWLockWritersWaitMilliseconds) \
\
M(NetworkErrors)
namespace ProfileEvents
{

View File

@ -86,8 +86,8 @@ public:
if (*needle < 0x80u)
{
first_needle_symbol_is_ascii = true;
l = static_cast<const UInt8>(std::tolower(*needle));
u = static_cast<const UInt8>(std::toupper(*needle));
l = std::tolower(*needle);
u = std::toupper(*needle);
}
else
{

View File

@ -9,6 +9,7 @@
#cmakedefine01 USE_RDKAFKA
#cmakedefine01 USE_CAPNP
#cmakedefine01 USE_EMBEDDED_COMPILER
#cmakedefine01 LLVM_HAS_RTTI
#cmakedefine01 Poco_SQLODBC_FOUND
#cmakedefine01 Poco_DataODBC_FOUND
#cmakedefine01 Poco_MongoDB_FOUND

View File

@ -10,7 +10,7 @@
namespace DB
{
bool isLocalAddress(const Poco::Net::SocketAddress & address)
bool isLocalAddress(const Poco::Net::IPAddress & address)
{
static auto interfaces = Poco::Net::NetworkInterface::list();
@ -21,14 +21,14 @@ bool isLocalAddress(const Poco::Net::SocketAddress & address)
* Theoretically, this may not be correct - depends on `route` setting
* - through which interface we will actually access the specified address.
*/
return interface.address().length() == address.host().length()
&& 0 == memcmp(interface.address().addr(), address.host().addr(), address.host().length());
return interface.address().length() == address.length()
&& 0 == memcmp(interface.address().addr(), address.addr(), address.length());
});
}
bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_port)
{
return clickhouse_port == address.port() && isLocalAddress(address);
return clickhouse_port == address.port() && isLocalAddress(address.host());
}

View File

@ -1,6 +1,8 @@
#pragma once
#include <common/Types.h>
#include <Poco/Net/IPAddress.h>
namespace Poco
{
@ -24,6 +26,8 @@ namespace DB
bool isLocalAddress(const Poco::Net::SocketAddress & address);
bool isLocalAddress(const Poco::Net::IPAddress & address);
/// Returns number of different bytes in hostnames, used for load balancing
size_t getHostNameDifference(const std::string & local_hostname, const std::string & host);
}

View File

@ -5,6 +5,7 @@
#include <Poco/File.h>
#include <Common/Exception.h>
#include <port/unistd.h>
#include <errno.h>
namespace DB

View File

@ -361,10 +361,19 @@ private:
switch (field.which)
{
case Types::Null: f(field.template get<Null>()); return;
// gcc 7.3.0
#if !__clang__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif
case Types::UInt64: f(field.template get<UInt64>()); return;
case Types::UInt128: f(field.template get<UInt128>()); return;
case Types::Int64: f(field.template get<Int64>()); return;
case Types::Float64: f(field.template get<Float64>()); return;
#if !__clang__
#pragma GCC diagnostic pop
#endif
case Types::String: f(field.template get<String>()); return;
case Types::Array: f(field.template get<Array>()); return;
case Types::Tuple: f(field.template get<Tuple>()); return;

View File

@ -150,7 +150,7 @@ private:
--ptr->refcount;
if (!ptr->refcount)
{
if (std::uncaught_exception())
if (std::uncaught_exceptions())
delete ptr;
else
ptr->output_blocks->push_back(ptr);

View File

@ -108,6 +108,7 @@ Block MergeSortingBlockInputStream::readImpl()
*/
if (max_bytes_before_external_sort && sum_bytes_in_blocks > max_bytes_before_external_sort)
{
Poco::File(tmp_path).createDirectories();
temporary_files.emplace_back(new Poco::TemporaryFile(tmp_path));
const std::string & path = temporary_files.back()->path();
WriteBufferFromFile file_buf(path);

127
dbms/src/DataTypes/Native.h Normal file
View File

@ -0,0 +1,127 @@
#pragma once
#include <Common/config.h>
#if USE_EMBEDDED_COMPILER
#include <Common/typeid_cast.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeInterval.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeUUID.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include <llvm/IR/IRBuilder.h>
#pragma GCC diagnostic pop
namespace DB
{
template <typename... Ts>
static inline bool typeIsEither(const IDataType & type)
{
return (typeid_cast<const Ts *>(&type) || ...);
}
static inline bool typeIsSigned(const IDataType & type)
{
return typeIsEither<
DataTypeInt8, DataTypeInt16, DataTypeInt32, DataTypeInt64,
DataTypeFloat32, DataTypeFloat64,
DataTypeDate, DataTypeDateTime, DataTypeInterval
>(type);
}
static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const IDataType & type)
{
if (auto * nullable = typeid_cast<const DataTypeNullable *>(&type))
{
auto * wrapped = toNativeType(builder, *nullable->getNestedType());
return wrapped ? llvm::StructType::get(wrapped, /* is null = */ builder.getInt1Ty()) : nullptr;
}
/// LLVM doesn't have unsigned types, it has unsigned instructions.
if (typeIsEither<DataTypeInt8, DataTypeUInt8>(type))
return builder.getInt8Ty();
if (typeIsEither<DataTypeInt16, DataTypeUInt16, DataTypeDate>(type))
return builder.getInt16Ty();
if (typeIsEither<DataTypeInt32, DataTypeUInt32, DataTypeDateTime>(type))
return builder.getInt32Ty();
if (typeIsEither<DataTypeInt64, DataTypeUInt64, DataTypeInterval>(type))
return builder.getInt64Ty();
if (typeIsEither<DataTypeUUID>(type))
return builder.getInt128Ty();
if (typeIsEither<DataTypeFloat32>(type))
return builder.getFloatTy();
if (typeIsEither<DataTypeFloat64>(type))
return builder.getDoubleTy();
if (auto * fixed_string = typeid_cast<const DataTypeFixedString *>(&type))
return llvm::VectorType::get(builder.getInt8Ty(), fixed_string->getN());
return nullptr;
}
static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const DataTypePtr & type)
{
return toNativeType(builder, *type);
}
static inline llvm::Value * nativeBoolCast(llvm::IRBuilder<> & b, const DataTypePtr & from, llvm::Value * value)
{
if (from->isNullable())
{
auto * inner = nativeBoolCast(b, removeNullable(from), b.CreateExtractValue(value, {0}));
return b.CreateAnd(b.CreateNot(b.CreateExtractValue(value, {1})), inner);
}
auto * zero = llvm::Constant::getNullValue(value->getType());
if (value->getType()->isIntegerTy())
return b.CreateICmpNE(value, zero);
if (value->getType()->isFloatingPointTy())
return b.CreateFCmpONE(value, zero); /// QNaN is false
throw Exception("Cannot cast non-number " + from->getName() + " to bool", ErrorCodes::NOT_IMPLEMENTED);
}
static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, const DataTypePtr & from, llvm::Value * value, llvm::Type * to)
{
auto * n_from = value->getType();
if (n_from == to)
return value;
if (n_from->isIntegerTy() && to->isFloatingPointTy())
return typeIsSigned(*from) ? b.CreateSIToFP(value, to) : b.CreateUIToFP(value, to);
if (n_from->isFloatingPointTy() && to->isIntegerTy())
return typeIsSigned(*from) ? b.CreateFPToSI(value, to) : b.CreateFPToUI(value, to);
if (n_from->isIntegerTy() && to->isIntegerTy())
return b.CreateIntCast(value, to, typeIsSigned(*from));
if (n_from->isFloatingPointTy() && to->isFloatingPointTy())
return b.CreateFPCast(value, to);
throw Exception("Cannot cast " + from->getName() + " to requested type", ErrorCodes::NOT_IMPLEMENTED);
}
static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, const DataTypePtr & from, llvm::Value * value, const DataTypePtr & to)
{
auto * n_to = toNativeType(b, to);
if (value->getType() == n_to)
return value;
if (from->isNullable() && to->isNullable())
{
auto * inner = nativeCast(b, removeNullable(from), b.CreateExtractValue(value, {0}), to);
return b.CreateInsertValue(inner, b.CreateExtractValue(value, {1}), {1});
}
if (from->isNullable())
return nativeCast(b, removeNullable(from), b.CreateExtractValue(value, {0}), to);
if (to->isNullable())
{
auto * inner = nativeCast(b, from, value, removeNullable(to));
return b.CreateInsertValue(llvm::Constant::getNullValue(n_to), inner, {0});
}
return nativeCast(b, from, value, n_to);
}
}
#endif

View File

@ -109,3 +109,7 @@ endif ()
if (ENABLE_TESTS)
add_subdirectory (tests)
endif ()
if (USE_EMBEDDED_COMPILER)
target_include_directories (clickhouse_functions BEFORE PUBLIC ${LLVM_INCLUDE_DIRS})
endif ()

View File

@ -5,12 +5,16 @@
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypeNullable.h>
#include <IO/WriteHelpers.h>
#include "FunctionsArithmetic.h"
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * column)
{
if (!column->isColumnConst())

View File

@ -4,6 +4,7 @@
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeInterval.h>
#include <DataTypes/Native.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnConst.h>
#include <Functions/IFunction.h>
@ -17,7 +18,14 @@
#include <Interpreters/ExpressionActions.h>
#include <ext/range.h>
#include <common/intExp.h>
#include <boost/math/common_factor.hpp>
#include <boost/integer/common_factor.hpp>
#if USE_EMBEDDED_COMPILER
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include <llvm/IR/IRBuilder.h>
#pragma GCC diagnostic pop
#endif
namespace DB
@ -106,6 +114,15 @@ struct PlusImpl
/// Next everywhere, static_cast - so that there is no wrong result in expressions of the form Int64 c = UInt32(a) * Int32(-1).
return static_cast<Result>(a) + b;
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
{
return left->getType()->isIntegerTy() ? b.CreateAdd(left, right) : b.CreateFAdd(left, right);
}
#endif
};
@ -119,6 +136,15 @@ struct MultiplyImpl
{
return static_cast<Result>(a) * b;
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
{
return left->getType()->isIntegerTy() ? b.CreateMul(left, right) : b.CreateFMul(left, right);
}
#endif
};
template <typename A, typename B>
@ -131,6 +157,15 @@ struct MinusImpl
{
return static_cast<Result>(a) - b;
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
{
return left->getType()->isIntegerTy() ? b.CreateSub(left, right) : b.CreateFSub(left, right);
}
#endif
};
template <typename A, typename B>
@ -143,6 +178,17 @@ struct DivideFloatingImpl
{
return static_cast<Result>(a) / b;
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
{
if (left->getType()->isIntegerTy())
throw Exception("DivideFloatingImpl expected a floating-point type", ErrorCodes::LOGICAL_ERROR);
return b.CreateFDiv(left, right);
}
#endif
};
@ -189,6 +235,10 @@ struct DivideIntegralImpl
throwIfDivisionLeadsToFPE(a, b);
return a / b;
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false; /// don't know how to throw from LLVM IR
#endif
};
template <typename A, typename B>
@ -201,6 +251,10 @@ struct DivideIntegralOrZeroImpl
{
return unlikely(divisionLeadsToFPE(a, b)) ? 0 : a / b;
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false; /// TODO implement the checks
#endif
};
template <typename A, typename B>
@ -212,9 +266,12 @@ struct ModuloImpl
static inline Result apply(A a, B b)
{
throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<A>::Type(a), typename NumberTraits::ToInteger<B>::Type(b));
return typename NumberTraits::ToInteger<A>::Type(a)
% typename NumberTraits::ToInteger<B>::Type(b);
return typename NumberTraits::ToInteger<A>::Type(a) % typename NumberTraits::ToInteger<B>::Type(b);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false; /// don't know how to throw from LLVM IR
#endif
};
template <typename A, typename B>
@ -225,9 +282,19 @@ struct BitAndImpl
template <typename Result = ResultType>
static inline Result apply(A a, B b)
{
return static_cast<Result>(a)
& static_cast<Result>(b);
return static_cast<Result>(a) & static_cast<Result>(b);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
{
if (!left->getType()->isIntegerTy())
throw Exception("BitAndImpl expected an integral type", ErrorCodes::LOGICAL_ERROR);
return b.CreateAnd(left, right);
}
#endif
};
template <typename A, typename B>
@ -238,9 +305,19 @@ struct BitOrImpl
template <typename Result = ResultType>
static inline Result apply(A a, B b)
{
return static_cast<Result>(a)
| static_cast<Result>(b);
return static_cast<Result>(a) | static_cast<Result>(b);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
{
if (!left->getType()->isIntegerTy())
throw Exception("BitOrImpl expected an integral type", ErrorCodes::LOGICAL_ERROR);
return b.CreateOr(left, right);
}
#endif
};
template <typename A, typename B>
@ -251,9 +328,19 @@ struct BitXorImpl
template <typename Result = ResultType>
static inline Result apply(A a, B b)
{
return static_cast<Result>(a)
^ static_cast<Result>(b);
return static_cast<Result>(a) ^ static_cast<Result>(b);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
{
if (!left->getType()->isIntegerTy())
throw Exception("BitXorImpl expected an integral type", ErrorCodes::LOGICAL_ERROR);
return b.CreateXor(left, right);
}
#endif
};
template <typename A, typename B>
@ -264,9 +351,19 @@ struct BitShiftLeftImpl
template <typename Result = ResultType>
static inline Result apply(A a, B b)
{
return static_cast<Result>(a)
<< static_cast<Result>(b);
return static_cast<Result>(a) << static_cast<Result>(b);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
{
if (!left->getType()->isIntegerTy())
throw Exception("BitShiftLeftImpl expected an integral type", ErrorCodes::LOGICAL_ERROR);
return b.CreateShl(left, right);
}
#endif
};
template <typename A, typename B>
@ -277,9 +374,19 @@ struct BitShiftRightImpl
template <typename Result = ResultType>
static inline Result apply(A a, B b)
{
return static_cast<Result>(a)
>> static_cast<Result>(b);
return static_cast<Result>(a) >> static_cast<Result>(b);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed)
{
if (!left->getType()->isIntegerTy())
throw Exception("BitShiftRightImpl expected an integral type", ErrorCodes::LOGICAL_ERROR);
return is_signed ? b.CreateAShr(left, right) : b.CreateLShr(left, right);
}
#endif
};
template <typename A, typename B>
@ -293,6 +400,19 @@ struct BitRotateLeftImpl
return (static_cast<Result>(a) << static_cast<Result>(b))
| (static_cast<Result>(a) >> ((sizeof(Result) * 8) - static_cast<Result>(b)));
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
{
if (!left->getType()->isIntegerTy())
throw Exception("BitRotateLeftImpl expected an integral type", ErrorCodes::LOGICAL_ERROR);
auto * size = llvm::ConstantInt::get(left->getType(), left->getType()->getPrimitiveSizeInBits());
/// XXX how is this supposed to behave in signed mode?
return b.CreateOr(b.CreateShl(left, right), b.CreateLShr(left, b.CreateSub(size, right)));
}
#endif
};
template <typename A, typename B>
@ -306,24 +426,35 @@ struct BitRotateRightImpl
return (static_cast<Result>(a) >> static_cast<Result>(b))
| (static_cast<Result>(a) << ((sizeof(Result) * 8) - static_cast<Result>(b)));
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
{
if (!left->getType()->isIntegerTy())
throw Exception("BitRotateRightImpl expected an integral type", ErrorCodes::LOGICAL_ERROR);
auto * size = llvm::ConstantInt::get(left->getType(), left->getType()->getPrimitiveSizeInBits());
return b.CreateOr(b.CreateLShr(left, right), b.CreateShl(left, b.CreateSub(size, right)));
}
#endif
};
template <typename T>
std::enable_if_t<std::is_integral_v<T>, T> toInteger(T x) { return x; }
template <typename T>
std::enable_if_t<std::is_floating_point_v<T>, Int64> toInteger(T x) { return Int64(x); }
template <typename A, typename B>
struct BitTestImpl
{
using ResultType = UInt8;
template <typename Result = ResultType>
static inline Result apply(A a, B b) { return (toInteger(a) >> toInteger(b)) & 1; };
static inline Result apply(A a, B b)
{
return (typename NumberTraits::ToInteger<A>::Type(a) >> typename NumberTraits::ToInteger<B>::Type(b)) & 1;
};
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false; /// TODO
#endif
};
template <typename A, typename B>
struct LeastBaseImpl
@ -336,6 +467,18 @@ struct LeastBaseImpl
/** gcc 4.9.2 successfully vectorizes a loop from this function. */
return static_cast<Result>(a) < static_cast<Result>(b) ? static_cast<Result>(a) : static_cast<Result>(b);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed)
{
if (!left->getType()->isIntegerTy())
/// XXX minnum is basically fmin(), it may or may not match whatever apply() does
return b.CreateMinNum(left, right);
return b.CreateSelect(is_signed ? b.CreateICmpSLT(left, right) : b.CreateICmpULT(left, right), left, right);
}
#endif
};
template <typename A, typename B>
@ -349,6 +492,10 @@ struct LeastSpecialImpl
static_assert(std::is_same_v<Result, ResultType>, "ResultType != Result");
return accurate::lessOp(a, b) ? static_cast<Result>(a) : static_cast<Result>(b);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false; /// ???
#endif
};
template <typename A, typename B>
@ -365,6 +512,19 @@ struct GreatestBaseImpl
{
return static_cast<Result>(a) > static_cast<Result>(b) ? static_cast<Result>(a) : static_cast<Result>(b);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed)
{
if (!left->getType()->isIntegerTy())
/// XXX maxnum is basically fmax(), it may or may not match whatever apply() does
/// XXX CreateMaxNum is broken on LLVM 5.0 and 6.0 (generates minnum instead; fixed in 7)
return b.CreateBinaryIntrinsic(llvm::Intrinsic::maxnum, left, right);
return b.CreateSelect(is_signed ? b.CreateICmpSGT(left, right) : b.CreateICmpUGT(left, right), left, right);
}
#endif
};
template <typename A, typename B>
@ -378,6 +538,10 @@ struct GreatestSpecialImpl
static_assert(std::is_same_v<Result, ResultType>, "ResultType != Result");
return accurate::greaterOp(a, b) ? static_cast<Result>(a) : static_cast<Result>(b);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false; /// ???
#endif
};
template <typename A, typename B>
@ -393,6 +557,15 @@ struct NegateImpl
{
return -static_cast<ResultType>(a);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
{
return arg->getType()->isIntegerTy() ? b.CreateNeg(arg) : b.CreateFNeg(arg);
}
#endif
};
template <typename A>
@ -404,6 +577,17 @@ struct BitNotImpl
{
return ~static_cast<ResultType>(a);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
{
if (!arg->getType()->isIntegerTy())
throw Exception("BitNotImpl expected an integral type", ErrorCodes::LOGICAL_ERROR);
return b.CreateNot(arg);
}
#endif
};
template <typename A>
@ -420,6 +604,10 @@ struct AbsImpl
else if constexpr (std::is_floating_point_v<A>)
return static_cast<ResultType>(std::abs(a));
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false; /// special type handling, some other time
#endif
};
template <typename A, typename B>
@ -432,10 +620,14 @@ struct GCDImpl
{
throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<A>::Type(a), typename NumberTraits::ToInteger<B>::Type(b));
throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<B>::Type(b), typename NumberTraits::ToInteger<A>::Type(a));
return boost::math::gcd(
return boost::integer::gcd(
typename NumberTraits::ToInteger<Result>::Type(a),
typename NumberTraits::ToInteger<Result>::Type(b));
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false; /// exceptions (and a non-trivial algorithm)
#endif
};
template <typename A, typename B>
@ -448,10 +640,14 @@ struct LCMImpl
{
throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<A>::Type(a), typename NumberTraits::ToInteger<B>::Type(b));
throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<B>::Type(b), typename NumberTraits::ToInteger<A>::Type(a));
return boost::math::lcm(
return boost::integer::lcm(
typename NumberTraits::ToInteger<Result>::Type(a),
typename NumberTraits::ToInteger<Result>::Type(b));
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false; /// exceptions (and a non-trivial algorithm)
#endif
};
template <typename A>
@ -463,6 +659,17 @@ struct IntExp2Impl
{
return intExp2(a);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
{
if (!arg->getType()->isIntegerTy())
throw Exception("IntExp2Impl expected an integral type", ErrorCodes::LOGICAL_ERROR);
return b.CreateShl(llvm::ConstantInt::get(arg->getType(), 1), arg);
}
#endif
};
template <typename A>
@ -474,6 +681,10 @@ struct IntExp10Impl
{
return intExp10(a);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false; /// library function
#endif
};
/// Used to indicate undefined operation
@ -745,6 +956,43 @@ public:
if (!valid)
throw Exception(getName() + "'s arguments do not match the expected data types", ErrorCodes::LOGICAL_ERROR);
}
#if USE_EMBEDDED_COMPILER
bool isCompilableImpl(const DataTypes & arguments) const override
{
return castBothTypes(arguments[0].get(), arguments[1].get(), [&](const auto & left, const auto & right)
{
using LeftDataType = std::decay_t<decltype(left)>;
using RightDataType = std::decay_t<decltype(right)>;
using ResultDataType = typename BinaryOperationTraits<Op, LeftDataType, RightDataType>::ResultDataType;
using OpSpec = Op<typename LeftDataType::FieldType, typename RightDataType::FieldType>;
return !std::is_same_v<ResultDataType, InvalidType> && OpSpec::compilable;
});
}
llvm::Value * compileImpl(llvm::IRBuilderBase & builder, const DataTypes & types, ValuePlaceholders values) const override
{
llvm::Value * result = nullptr;
castBothTypes(types[0].get(), types[1].get(), [&](const auto & left, const auto & right)
{
using LeftDataType = std::decay_t<decltype(left)>;
using RightDataType = std::decay_t<decltype(right)>;
using ResultDataType = typename BinaryOperationTraits<Op, LeftDataType, RightDataType>::ResultDataType;
using OpSpec = Op<typename LeftDataType::FieldType, typename RightDataType::FieldType>;
if constexpr (!std::is_same_v<ResultDataType, InvalidType> && OpSpec::compilable)
{
auto & b = static_cast<llvm::IRBuilder<> &>(builder);
auto type = std::make_shared<ResultDataType>();
auto * lval = nativeCast(b, types[0], values[0](), type);
auto * rval = nativeCast(b, types[1], values[1](), type);
result = OpSpec::compile(b, lval, rval, std::is_signed_v<typename ResultDataType::FieldType>);
return true;
}
return false;
});
return result;
}
#endif
};
@ -821,6 +1069,35 @@ public:
throw Exception(getName() + "'s argument does not match the expected data type", ErrorCodes::LOGICAL_ERROR);
}
#if USE_EMBEDDED_COMPILER
bool isCompilableImpl(const DataTypes & arguments) const override
{
return castType(arguments[0].get(), [&](const auto & type)
{
return Op<typename std::decay_t<decltype(type)>::FieldType>::compilable;
});
}
llvm::Value * compileImpl(llvm::IRBuilderBase & builder, const DataTypes & types, ValuePlaceholders values) const override
{
llvm::Value * result = nullptr;
castType(types[0].get(), [&](const auto & type)
{
using T0 = typename std::decay_t<decltype(type)>::FieldType;
using T1 = typename Op<T0>::ResultType;
if constexpr (Op<T0>::compilable)
{
auto & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * v = nativeCast(b, types[0], values[0](), std::make_shared<DataTypeNumber<T1>>());
result = Op<T0>::compile(b, v, std::is_signed_v<T1>);
return true;
}
return false;
});
return result;
}
#endif
bool hasInformationAboutMonotonicity() const override
{
return FunctionUnaryArithmeticMonotonicity<Name>::has();

View File

@ -53,12 +53,26 @@ template <typename A, typename B> struct EqualsOp
using SymmetricOp = EqualsOp<B, A>;
static UInt8 apply(A a, B b) { return accurate::equalsOp(a, b); }
#if USE_EMBEDDED_COMPILER
static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * x, llvm::Value * y, bool /*is_signed*/)
{
return x->getType()->isIntegerTy() ? b.CreateICmpEQ(x, y) : b.CreateFCmpOEQ(x, y); /// qNaNs always compare false
}
#endif
};
template <typename A, typename B> struct NotEqualsOp
{
using SymmetricOp = NotEqualsOp<B, A>;
static UInt8 apply(A a, B b) { return accurate::notEqualsOp(a, b); }
#if USE_EMBEDDED_COMPILER
static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * x, llvm::Value * y, bool /*is_signed*/)
{
return x->getType()->isIntegerTy() ? b.CreateICmpNE(x, y) : b.CreateFCmpONE(x, y);
}
#endif
};
template <typename A, typename B> struct GreaterOp;
@ -67,12 +81,26 @@ template <typename A, typename B> struct LessOp
{
using SymmetricOp = GreaterOp<B, A>;
static UInt8 apply(A a, B b) { return accurate::lessOp(a, b); }
#if USE_EMBEDDED_COMPILER
static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * x, llvm::Value * y, bool is_signed)
{
return x->getType()->isIntegerTy() ? (is_signed ? b.CreateICmpSLT(x, y) : b.CreateICmpULT(x, y)) : b.CreateFCmpOLT(x, y);
}
#endif
};
template <typename A, typename B> struct GreaterOp
{
using SymmetricOp = LessOp<B, A>;
static UInt8 apply(A a, B b) { return accurate::greaterOp(a, b); }
#if USE_EMBEDDED_COMPILER
static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * x, llvm::Value * y, bool is_signed)
{
return x->getType()->isIntegerTy() ? (is_signed ? b.CreateICmpSGT(x, y) : b.CreateICmpUGT(x, y)) : b.CreateFCmpOGT(x, y);
}
#endif
};
template <typename A, typename B> struct GreaterOrEqualsOp;
@ -81,12 +109,26 @@ template <typename A, typename B> struct LessOrEqualsOp
{
using SymmetricOp = GreaterOrEqualsOp<B, A>;
static UInt8 apply(A a, B b) { return accurate::lessOrEqualsOp(a, b); }
#if USE_EMBEDDED_COMPILER
static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * x, llvm::Value * y, bool is_signed)
{
return x->getType()->isIntegerTy() ? (is_signed ? b.CreateICmpSLE(x, y) : b.CreateICmpULE(x, y)) : b.CreateFCmpOLE(x, y);
}
#endif
};
template <typename A, typename B> struct GreaterOrEqualsOp
{
using SymmetricOp = LessOrEqualsOp<B, A>;
static UInt8 apply(A a, B b) { return accurate::greaterOrEqualsOp(a, b); }
#if USE_EMBEDDED_COMPILER
static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * x, llvm::Value * y, bool is_signed)
{
return x->getType()->isIntegerTy() ? (is_signed ? b.CreateICmpSGE(x, y) : b.CreateICmpUGE(x, y)) : b.CreateFCmpOGE(x, y);
}
#endif
};
@ -1136,6 +1178,41 @@ public:
col_with_type_and_name_left.type, col_with_type_and_name_right.type,
left_is_num, input_rows_count);
}
#if USE_EMBEDDED_COMPILER
bool isCompilableImpl(const DataTypes & types) const override
{
auto isBigInteger = &typeIsEither<DataTypeInt64, DataTypeUInt64, DataTypeUUID>;
auto isFloatingPoint = &typeIsEither<DataTypeFloat32, DataTypeFloat64>;
if ((isBigInteger(*types[0]) && isFloatingPoint(*types[1])) || (isBigInteger(*types[1]) && isFloatingPoint(*types[0])))
return false; /// TODO: implement (double, int_N where N > double's mantissa width)
return types[0]->isValueRepresentedByNumber() && types[1]->isValueRepresentedByNumber();
}
llvm::Value * compileImpl(llvm::IRBuilderBase & builder, const DataTypes & types, ValuePlaceholders values) const override
{
auto & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * x = values[0]();
auto * y = values[1]();
if (!types[0]->equals(*types[1]))
{
llvm::Type * common;
if (x->getType()->isIntegerTy() && y->getType()->isIntegerTy())
common = b.getIntNTy(std::max(
/// if one integer has a sign bit, make sure the other does as well. llvm generates optimal code
/// (e.g. uses overflow flag on x86) for (word size + 1)-bit integer operations.
x->getType()->getIntegerBitWidth() + (!typeIsSigned(*types[0]) && typeIsSigned(*types[1])),
y->getType()->getIntegerBitWidth() + (!typeIsSigned(*types[1]) && typeIsSigned(*types[0]))));
else
/// (double, float) or (double, int_N where N <= double's mantissa width) -> double
common = b.getDoubleTy();
x = nativeCast(b, types[0], x, common);
y = nativeCast(b, types[1], y, common);
}
auto * result = Op<int, int>::compile(b, x, y, typeIsSigned(*types[0]) || typeIsSigned(*types[1]));
return b.CreateSelect(result, b.getInt8(1), b.getInt8(0));
}
#endif
};

View File

@ -6,6 +6,7 @@
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/Native.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnConst.h>
@ -113,7 +114,65 @@ public:
};
class FunctionIf : public IFunction
template <bool null_is_false>
class FunctionIfBase : public IFunction
{
#if USE_EMBEDDED_COMPILER
public:
bool isCompilableImpl(const DataTypes & types) const override
{
for (const auto & type : types)
if (!removeNullable(type)->isValueRepresentedByNumber())
return false;
return true;
}
llvm::Value * compileImpl(llvm::IRBuilderBase & builder, const DataTypes & types, ValuePlaceholders values) const override
{
auto & b = static_cast<llvm::IRBuilder<> &>(builder);
auto type = getReturnTypeImpl(types);
llvm::Value * null = nullptr;
if (!null_is_false && type->isNullable())
null = b.CreateInsertValue(llvm::Constant::getNullValue(toNativeType(b, type)), b.getTrue(), {1});
auto * head = b.GetInsertBlock();
auto * join = llvm::BasicBlock::Create(head->getContext(), "", head->getParent());
std::vector<std::pair<llvm::BasicBlock *, llvm::Value *>> returns;
for (size_t i = 0; i + 1 < types.size(); i += 2)
{
auto * then = llvm::BasicBlock::Create(head->getContext(), "", head->getParent());
auto * next = llvm::BasicBlock::Create(head->getContext(), "", head->getParent());
auto * cond = values[i]();
if (!null_is_false && types[i]->isNullable())
{
auto * nonnull = llvm::BasicBlock::Create(head->getContext(), "", head->getParent());
returns.emplace_back(b.GetInsertBlock(), null);
b.CreateCondBr(b.CreateExtractValue(cond, {1}), join, nonnull);
b.SetInsertPoint(nonnull);
b.CreateCondBr(nativeBoolCast(b, removeNullable(types[i]), b.CreateExtractValue(cond, {0})), then, next);
}
else
{
b.CreateCondBr(nativeBoolCast(b, types[i], cond), then, next);
}
b.SetInsertPoint(then);
auto * value = nativeCast(b, types[i + 1], values[i + 1](), type);
returns.emplace_back(b.GetInsertBlock(), value);
b.CreateBr(join);
b.SetInsertPoint(next);
}
auto * value = nativeCast(b, types.back(), values.back()(), type);
returns.emplace_back(b.GetInsertBlock(), value);
b.CreateBr(join);
b.SetInsertPoint(join);
auto * phi = b.CreatePHI(toNativeType(b, type), returns.size());
for (const auto & r : returns)
phi->addIncoming(r.second, r.first);
return phi;
}
#endif
};
class FunctionIf : public FunctionIfBase</*null_is_false=*/false>
{
public:
static constexpr auto name = "if";
@ -916,8 +975,8 @@ public:
/// - arrays of such types.
///
/// Additionally the arguments, conditions or branches, support nullable types
/// and the NULL value.
class FunctionMultiIf final : public IFunction
/// and the NULL value, with a NULL condition treated as false.
class FunctionMultiIf final : public FunctionIfBase</*null_is_false=*/true>
{
public:
static constexpr auto name = "multiIf";

View File

@ -21,7 +21,7 @@ void throwExceptionForIncompletelyParsedValue(
message_buf << " at begin of string";
if (to_type.isNumber())
message_buf << ". Note: there are to" << to_type.getName() << "OrZero function, which returns zero instead of throwing exception.";
message_buf << ". Note: there are to" << to_type.getName() << "OrZero and to" << to_type.getName() << "OrNull functions, which returns zero/NULL instead of throwing exception.";
throw Exception(message_buf.str(), ErrorCodes::CANNOT_PARSE_TEXT);
}

View File

@ -12,6 +12,13 @@
#include <Functions/FunctionHelpers.h>
#include <type_traits>
#if USE_EMBEDDED_COMPILER
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include <llvm/IR/IRBuilder.h>
#pragma GCC diagnostic pop
#endif
namespace DB
{
@ -31,65 +38,71 @@ namespace ErrorCodes
* For example, 1 OR NULL returns 1, not NULL.
*/
struct AndImpl
{
static inline bool isSaturable()
static inline constexpr bool isSaturable()
{
return true;
}
static inline bool isSaturatedValue(bool a)
static inline constexpr bool isSaturatedValue(bool a)
{
return !a;
}
static inline bool apply(bool a, bool b)
static inline constexpr bool apply(bool a, bool b)
{
return a && b;
}
static inline bool specialImplementationForNulls() { return false; }
static inline constexpr bool specialImplementationForNulls() { return false; }
};
struct OrImpl
{
static inline bool isSaturable()
static inline constexpr bool isSaturable()
{
return true;
}
static inline bool isSaturatedValue(bool a)
static inline constexpr bool isSaturatedValue(bool a)
{
return a;
}
static inline bool apply(bool a, bool b)
static inline constexpr bool apply(bool a, bool b)
{
return a || b;
}
static inline bool specialImplementationForNulls() { return true; }
static inline constexpr bool specialImplementationForNulls() { return true; }
};
struct XorImpl
{
static inline bool isSaturable()
static inline constexpr bool isSaturable()
{
return false;
}
static inline bool isSaturatedValue(bool)
static inline constexpr bool isSaturatedValue(bool)
{
return false;
}
static inline bool apply(bool a, bool b)
static inline constexpr bool apply(bool a, bool b)
{
return a != b;
}
static inline bool specialImplementationForNulls() { return false; }
static inline constexpr bool specialImplementationForNulls() { return false; }
#if USE_EMBEDDED_COMPILER
static inline llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a, llvm::Value * b)
{
return builder.CreateXor(a, b);
}
#endif
};
template <typename A>
@ -101,6 +114,13 @@ struct NotImpl
{
return !a;
}
#if USE_EMBEDDED_COMPILER
static inline llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a)
{
return builder.CreateNot(a);
}
#endif
};
@ -364,6 +384,44 @@ public:
block.getByPosition(result).column = std::move(col_res);
}
#if USE_EMBEDDED_COMPILER
bool isCompilableImpl(const DataTypes &) const override { return true; }
llvm::Value * compileImpl(llvm::IRBuilderBase & builder, const DataTypes & types, ValuePlaceholders values) const override
{
auto & b = static_cast<llvm::IRBuilder<> &>(builder);
if constexpr (!Impl::isSaturable())
{
auto * result = nativeBoolCast(b, types[0], values[0]());
for (size_t i = 1; i < types.size(); i++)
result = Impl::apply(b, result, nativeBoolCast(b, types[i], values[i]()));
return b.CreateSelect(result, b.getInt8(1), b.getInt8(0));
}
constexpr bool breakOnTrue = Impl::isSaturatedValue(true);
auto * next = b.GetInsertBlock();
auto * stop = llvm::BasicBlock::Create(next->getContext(), "", next->getParent());
b.SetInsertPoint(stop);
auto * phi = b.CreatePHI(b.getInt8Ty(), values.size());
for (size_t i = 0; i < types.size(); i++)
{
b.SetInsertPoint(next);
auto * value = values[i]();
auto * truth = nativeBoolCast(b, types[i], value);
if (!types[i]->equals(DataTypeUInt8{}))
value = b.CreateSelect(truth, b.getInt8(1), b.getInt8(0));
phi->addIncoming(value, b.GetInsertBlock());
if (i + 1 < types.size())
{
next = llvm::BasicBlock::Create(next->getContext(), "", next->getParent());
b.CreateCondBr(truth, breakOnTrue ? stop : next, breakOnTrue ? next : stop);
}
}
b.CreateBr(stop);
b.SetInsertPoint(stop);
return phi;
}
#endif
};
@ -430,6 +488,16 @@ public:
+ " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
#if USE_EMBEDDED_COMPILER
bool isCompilableImpl(const DataTypes &) const override { return true; }
llvm::Value * compileImpl(llvm::IRBuilderBase & builder, const DataTypes & types, ValuePlaceholders values) const override
{
auto & b = static_cast<llvm::IRBuilder<> &>(builder);
return b.CreateSelect(Impl<UInt8>::apply(b, nativeBoolCast(b, types[0], values[0]())), b.getInt8(1), b.getInt8(0));
}
#endif
};

View File

@ -93,6 +93,10 @@ struct RoundToExp2Impl
{
return roundDownToPowerOfTwo<T>(x);
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false;
#endif
};
@ -120,6 +124,10 @@ struct RoundDurationImpl
: (x < 36000 ? 18000
: 36000))))))))))))));
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false;
#endif
};
template <typename A>
@ -137,6 +145,10 @@ struct RoundAgeImpl
: (x < 55 ? 45
: 55)))));
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = false;
#endif
};

View File

@ -1,15 +1,25 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h>
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeNothing.h>
#include <Columns/ColumnConst.h>
#include <Interpreters/ExpressionActions.h>
#include <Common/config.h>
#include <Common/typeid_cast.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnNullable.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/Native.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/ExpressionActions.h>
#include <ext/range.h>
#include <ext/collection_cast.h>
#include <cstdlib>
#include <memory>
#include <optional>
#if USE_EMBEDDED_COMPILER
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include <llvm/IR/IRBuilder.h>
#pragma GCC diagnostic pop
#endif
namespace DB
@ -21,6 +31,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
}
namespace
{
@ -259,4 +270,71 @@ DataTypePtr FunctionBuilderImpl::getReturnType(const ColumnsWithTypeAndName & ar
return getReturnTypeImpl(arguments);
}
#if USE_EMBEDDED_COMPILER
static std::optional<DataTypes> removeNullables(const DataTypes & types)
{
for (const auto & type : types)
{
if (!typeid_cast<const DataTypeNullable *>(type.get()))
continue;
DataTypes filtered;
for (const auto & type : types)
filtered.emplace_back(removeNullable(type));
return filtered;
}
return {};
}
bool IFunction::isCompilable(const DataTypes & arguments) const
{
if (useDefaultImplementationForNulls())
if (auto denulled = removeNullables(arguments))
return isCompilableImpl(*denulled);
return isCompilableImpl(arguments);
}
llvm::Value * IFunction::compile(llvm::IRBuilderBase & builder, const DataTypes & arguments, ValuePlaceholders values) const
{
if (useDefaultImplementationForNulls())
{
if (auto denulled = removeNullables(arguments))
{
/// FIXME: when only one column is nullable, this can actually be slower than the non-jitted version
/// because this involves copying the null map while `wrapInNullable` reuses it.
auto & b = static_cast<llvm::IRBuilder<> &>(builder);
auto * fail = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent());
auto * join = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent());
auto * zero = llvm::Constant::getNullValue(toNativeType(b, makeNullable(getReturnTypeImpl(*denulled))));
for (size_t i = 0; i < arguments.size(); i++)
{
if (!arguments[i]->isNullable())
continue;
/// Would be nice to evaluate all this lazily, but that'd change semantics: if only unevaluated
/// arguments happen to contain NULLs, the return value would not be NULL, though it should be.
auto * value = values[i]();
auto * ok = llvm::BasicBlock::Create(b.GetInsertBlock()->getContext(), "", b.GetInsertBlock()->getParent());
b.CreateCondBr(b.CreateExtractValue(value, {1}), fail, ok);
b.SetInsertPoint(ok);
values[i] = [value = b.CreateExtractValue(value, {0})]() { return value; };
}
auto * result = b.CreateInsertValue(zero, compileImpl(builder, *denulled, std::move(values)), {0});
auto * result_block = b.GetInsertBlock();
b.CreateBr(join);
b.SetInsertPoint(fail);
auto * null = b.CreateInsertValue(zero, b.getTrue(), {1});
b.CreateBr(join);
b.SetInsertPoint(join);
auto * phi = b.CreatePHI(result->getType(), 2);
phi->addIncoming(result, result_block);
phi->addIncoming(null, fail);
return phi;
}
}
return compileImpl(builder, arguments, std::move(values));
}
#endif
}

View File

@ -2,6 +2,7 @@
#include <memory>
#include <Common/config.h>
#include <Core/Names.h>
#include <Core/Field.h>
#include <Core/Block.h>
@ -9,6 +10,14 @@
#include <DataTypes/IDataType.h>
namespace llvm
{
class LLVMContext;
class Value;
class IRBuilderBase;
}
namespace DB
{
@ -70,6 +79,8 @@ private:
size_t input_rows_count);
};
using ValuePlaceholders = std::vector<std::function<llvm::Value * ()>>;
/// Function with known arguments and return type.
class IFunctionBase
{
@ -92,6 +103,25 @@ public:
return prepare(block)->execute(block, arguments, result, input_rows_count);
}
#if USE_EMBEDDED_COMPILER
virtual bool isCompilable() const { return false; }
/** Produce LLVM IR code that operates on scalar values. See `toNativeType` in DataTypes/Native.h
* for supported value types and how they map to LLVM types.
*
* NOTE: the builder is actually guaranteed to be exactly `llvm::IRBuilder<>`, so you may safely
* downcast it to that type. This method is specified with `IRBuilderBase` because forward-declaring
* templates with default arguments is impossible and including LLVM in such a generic header
* as this one is a major pain.
*/
virtual llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, ValuePlaceholders /*values*/) const
{
throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED);
}
#endif
/** Should we evaluate this function while constant folding, if arguments are constants?
* Usually this is true. Notable counterexample is function 'sleep'.
* If we will call it during query analysis, we will sleep extra amount of time.
@ -261,7 +291,6 @@ public:
using PreparedFunctionImpl::execute;
using FunctionBuilderImpl::getReturnTypeImpl;
using FunctionBuilderImpl::getLambdaArgumentTypesImpl;
using FunctionBuilderImpl::getReturnType;
PreparedFunctionPtr prepare(const Block & /*sample_block*/) const final
@ -269,17 +298,51 @@ public:
throw Exception("prepare is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED);
}
#if USE_EMBEDDED_COMPILER
bool isCompilable() const final
{
throw Exception("isCompilable without explicit types is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED);
}
llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, ValuePlaceholders /*values*/) const final
{
throw Exception("compile without explicit types is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED);
}
#endif
const DataTypes & getArgumentTypes() const final
{
throw Exception("getArgumentTypes is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED);
}
const DataTypePtr & getReturnType() const override
const DataTypePtr & getReturnType() const final
{
throw Exception("getReturnType is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED);
}
#if USE_EMBEDDED_COMPILER
bool isCompilable(const DataTypes & arguments) const;
llvm::Value * compile(llvm::IRBuilderBase &, const DataTypes & arguments, ValuePlaceholders values) const;
#endif
protected:
#if USE_EMBEDDED_COMPILER
virtual bool isCompilableImpl(const DataTypes &) const { return false; }
virtual llvm::Value * compileImpl(llvm::IRBuilderBase &, const DataTypes &, ValuePlaceholders) const
{
throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED);
}
#endif
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr & /*return_type*/) const final
{
throw Exception("buildImpl is not implemented for IFunction", ErrorCodes::NOT_IMPLEMENTED);
@ -319,6 +382,14 @@ public:
const DataTypes & getArgumentTypes() const override { return arguments; }
const DataTypePtr & getReturnType() const override { return return_type; }
#if USE_EMBEDDED_COMPILER
bool isCompilable() const override { return function->isCompilable(arguments); }
llvm::Value * compile(llvm::IRBuilderBase & builder, ValuePlaceholders values) const override { return function->compile(builder, arguments, std::move(values)); }
#endif
PreparedFunctionPtr prepare(const Block & /*sample_block*/) const override { return std::make_shared<DefaultExecutable>(function); }
bool isSuitableForConstantFolding() const override { return function->isSuitableForConstantFolding(); }

View File

@ -7,6 +7,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <optional>

View File

@ -3,6 +3,7 @@
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <Common/ProfileEvents.h>
#include <errno.h>
namespace ProfileEvents

View File

@ -1,10 +1,9 @@
#include <IO/ReadWriteBufferFromHTTP.h>
#include <Common/SimpleCache.h>
#include <Common/config.h>
#include <Core/Types.h>
#include <IO/ReadBufferFromIStream.h>
#include <Common/DNSCache.h>
#include <Common/DNSResolver.h>
#include <Poco/Net/HTTPRequest.h>
#include <Poco/Net/HTTPResponse.h>
#include <Poco/Version.h>
@ -43,7 +42,7 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP(const Poco::URI & uri,
new Poco::Net::HTTPClientSession)
}
{
session->setHost(DNSCache::instance().resolveHost(uri.getHost()).toString());
session->setHost(DNSResolver::instance().resolveHost(uri.getHost()).toString());
session->setPort(uri.getPort());
#if POCO_CLICKHOUSE_PATCH || POCO_VERSION >= 0x02000000

View File

@ -1,88 +0,0 @@
#pragma once
#include <IO/ReadWriteBufferFromHTTP.h>
#include "ReadHelpers.h"
#define DEFAULT_REMOTE_READ_BUFFER_CONNECTION_TIMEOUT 1
#define DEFAULT_REMOTE_READ_BUFFER_RECEIVE_TIMEOUT 1800
#define DEFAULT_REMOTE_READ_BUFFER_SEND_TIMEOUT 1800
namespace DB
{
/** Allows you to read a file from a remote server via riod.
*/
class RemoteReadBuffer : public ReadBuffer
{
private:
std::unique_ptr<ReadWriteBufferFromHTTP> impl;
public:
RemoteReadBuffer(
const std::string & host,
int port,
const std::string & path,
bool compress = true,
size_t buffer_size = DBMS_DEFAULT_BUFFER_SIZE,
const Poco::Timespan & connection_timeout = Poco::Timespan(DEFAULT_REMOTE_READ_BUFFER_CONNECTION_TIMEOUT, 0),
const Poco::Timespan & send_timeout = Poco::Timespan(DEFAULT_REMOTE_READ_BUFFER_SEND_TIMEOUT, 0),
const Poco::Timespan & receive_timeout = Poco::Timespan(DEFAULT_REMOTE_READ_BUFFER_RECEIVE_TIMEOUT, 0))
: ReadBuffer(nullptr, 0)
{
Poco::URI uri;
uri.setScheme("http");
uri.setHost(host);
uri.setPort(port);
uri.setQueryParameters(
{
std::make_pair("action", "read"),
std::make_pair("path", path),
std::make_pair("compress", (compress ? "true" : "false"))
});
ConnectionTimeouts timeouts(connection_timeout, send_timeout, receive_timeout);
ReadWriteBufferFromHTTP::OutStreamCallback callback;
impl = std::make_unique<ReadWriteBufferFromHTTP>(uri, std::string(), callback, timeouts, buffer_size);
}
bool nextImpl() override
{
if (!impl->next())
return false;
internal_buffer = impl->buffer();
working_buffer = internal_buffer;
return true;
}
/// Return the list of file names in the directory.
static std::vector<std::string> listFiles(
const std::string & host,
int port,
const std::string & path,
const ConnectionTimeouts & timeouts)
{
Poco::URI uri;
uri.setScheme("http");
uri.setHost(host);
uri.setPort(port);
uri.setQueryParameters(
{
std::make_pair("action", "list"),
std::make_pair("path", path)});
ReadWriteBufferFromHTTP in(uri, {}, {}, timeouts);
std::vector<std::string> files;
while (!in.eof())
{
std::string s;
readString(s, in);
skipWhitespaceIfAny(in);
files.push_back(s);
}
return files;
}
};
}

View File

@ -1,264 +0,0 @@
#pragma once
#include <Poco/Version.h>
#include <Poco/URI.h>
#include <Poco/Net/HTTPRequest.h>
#include <Poco/Net/HTTPResponse.h>
#include <Poco/Net/HTTPClientSession.h>
#include <Poco/Net/NetException.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteBufferFromOStream.h>
#include <IO/HashingWriteBuffer.h>
#include <common/logger_useful.h>
#define DEFAULT_REMOTE_WRITE_BUFFER_CONNECTION_TIMEOUT 1
#define DEFAULT_REMOTE_WRITE_BUFFER_RECEIVE_TIMEOUT 1800
#define DEFAULT_REMOTE_WRITE_BUFFER_SEND_TIMEOUT 1800
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_WRITE_TO_OSTREAM;
extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER;
}
/** Allows you to write a file to a remote server.
*/
class RemoteWriteBuffer : public WriteBuffer
{
private:
std::string host;
int port;
std::string path;
std::string encoded_path;
std::string encoded_tmp_path;
std::string tmp_path;
std::string if_exists;
bool decompress;
unsigned connection_retries;
std::string uri_str;
Poco::Net::HTTPClientSession session;
std::ostream * ostr; /// this is owned by session
std::unique_ptr<WriteBuffer> impl;
/// Have sent all the data and renamed the file
bool finalized;
public:
/** If tmp_path is not empty, it writes first the temporary file, and then renames it,
* deleting existing files, if any.
* Otherwise, if_exists parameter is used.
*/
RemoteWriteBuffer(const std::string & host_, int port_, const std::string & path_,
const std::string & tmp_path_ = "", const std::string & if_exists_ = "remove",
bool decompress_ = false,
unsigned connection_retries_ = 3,
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
const Poco::Timespan & connection_timeout = Poco::Timespan(DEFAULT_REMOTE_WRITE_BUFFER_CONNECTION_TIMEOUT, 0),
const Poco::Timespan & send_timeout = Poco::Timespan(DEFAULT_REMOTE_WRITE_BUFFER_SEND_TIMEOUT, 0),
const Poco::Timespan & receive_timeout = Poco::Timespan(DEFAULT_REMOTE_WRITE_BUFFER_RECEIVE_TIMEOUT, 0))
: WriteBuffer(nullptr, 0), host(host_), port(port_), path(path_),
tmp_path(tmp_path_), if_exists(if_exists_),
decompress(decompress_), connection_retries(connection_retries_), finalized(false)
{
Poco::URI::encode(path, "&#", encoded_path);
Poco::URI::encode(tmp_path, "&#", encoded_tmp_path);
std::stringstream uri;
uri << "http://" << host << ":" << port
<< "/?action=write"
<< "&path=" << (tmp_path.empty() ? encoded_path : encoded_tmp_path)
<< "&if_exists=" << if_exists
<< "&decompress=" << (decompress ? "true" : "false");
uri_str = Poco::URI(uri.str()).getPathAndQuery();
session.setHost(host);
session.setPort(port);
session.setKeepAlive(true);
/// set the timeout
#if POCO_CLICKHOUSE_PATCH || POCO_VERSION >= 0x02000000
session.setTimeout(connection_timeout, send_timeout, receive_timeout);
#else
session.setTimeout(connection_timeout);
#endif
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_POST, uri_str, Poco::Net::HTTPRequest::HTTP_1_1);
request.setChunkedTransferEncoding(true);
for (unsigned i = 0; i < connection_retries; ++i)
{
LOG_TRACE((&Logger::get("RemoteWriteBuffer")), "Sending write request to " << host << ":" << port << uri_str);
try
{
ostr = &session.sendRequest(request);
}
catch (const Poco::Net::NetException & e)
{
if (i + 1 == connection_retries)
throw;
LOG_WARNING((&Logger::get("RemoteWriteBuffer")), e.displayText() << ", URL: " << host << ":" << port << uri_str << ", try No " << i + 1 << ".");
session.reset();
continue;
}
catch (const Poco::TimeoutException & e)
{
if (i + 1 == connection_retries)
throw;
LOG_WARNING((&Logger::get("RemoteWriteBuffer")), "Connection timeout from " << host << ":" << port << uri_str << ", try No " << i + 1 << ".");
session.reset();
continue;
}
break;
}
impl = std::make_unique<WriteBufferFromOStream>(*ostr, buffer_size_);
set(impl->buffer().begin(), impl->buffer().size());
}
void nextImpl() override
{
if (!offset() || finalized)
return;
/// For correct work with AsynchronousWriteBuffer, which replaces buffers.
impl->set(buffer().begin(), buffer().size());
impl->position() = pos;
try
{
impl->next();
}
catch (const Exception & e)
{
if (e.code() == ErrorCodes::CANNOT_WRITE_TO_OSTREAM)
checkStatus(); /// Change the error message to a clearer one.
throw;
}
}
void finalize()
{
if (finalized)
return;
next();
checkStatus();
/// Rename the file if necessary.
if (!tmp_path.empty())
rename();
finalized = true;
}
void cancel()
{
finalized = true;
}
~RemoteWriteBuffer()
{
try
{
finalize();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
private:
void checkStatus()
{
Poco::Net::HTTPResponse response;
std::istream & istr = session.receiveResponse(response);
Poco::Net::HTTPResponse::HTTPStatus status = response.getStatus();
std::stringstream message;
message << istr.rdbuf();
if (status != Poco::Net::HTTPResponse::HTTP_OK || message.str() != "Ok.\n")
{
std::stringstream error_message;
error_message << "Received error from remote server " << uri_str << ", body: " << message.str();
throw Exception(error_message.str(), ErrorCodes::RECEIVED_ERROR_FROM_REMOTE_IO_SERVER);
}
}
void rename()
{
std::stringstream uri;
uri << "http://" << host << ":" << port
<< "/?action=rename"
<< "&from=" << encoded_tmp_path
<< "&to=" << encoded_path;
uri_str = Poco::URI(uri.str()).getPathAndQuery();
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, uri_str, Poco::Net::HTTPRequest::HTTP_1_1);
for (unsigned i = 0; i < connection_retries; ++i)
{
LOG_TRACE((&Logger::get("RemoteWriteBuffer")), "Sending rename request to " << host << ":" << port << uri_str);
try
{
session.sendRequest(request);
checkStatus();
}
catch (const Poco::Net::NetException & e)
{
if (i + 1 == connection_retries)
throw;
LOG_WARNING((&Logger::get("RemoteWriteBuffer")), e.what() << ", message: " << e.displayText()
<< ", URL: " << host << ":" << port << uri_str << ", try No " << i + 1 << ".");
session.reset();
continue;
}
catch (const Poco::TimeoutException & e)
{
if (i + 1 == connection_retries)
throw;
LOG_WARNING((&Logger::get("RemoteWriteBuffer")), "Connection timeout from " << host << ":" << port << uri_str << ", try No " << i + 1 << ".");
session.reset();
continue;
}
catch (const Exception & e)
{
/// If in the last attempt we did not receive a response from the server, but the file was renamed already.
if (i != 0 && e.code() == ErrorCodes::RECEIVED_ERROR_FROM_REMOTE_IO_SERVER
&& nullptr != strstr(e.displayText().data(), "File not found"))
{
LOG_TRACE((&Logger::get("RemoteWriteBuffer")), "File already renamed");
}
else
throw;
}
break;
}
}
};
}

View File

@ -6,6 +6,7 @@
#include <limits>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
namespace ProfileEvents

View File

@ -1,5 +1,6 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <Common/ProfileEvents.h>

View File

@ -1,58 +1,57 @@
include_directories (${CMAKE_CURRENT_BINARY_DIR})
set(SRCS )
add_executable (read_buffer read_buffer.cpp ${SRCS})
add_executable (read_buffer read_buffer.cpp)
target_link_libraries (read_buffer clickhouse_common_io)
add_executable (read_buffer_perf read_buffer_perf.cpp ${SRCS})
add_executable (read_buffer_perf read_buffer_perf.cpp)
target_link_libraries (read_buffer_perf clickhouse_common_io)
add_executable (read_float_perf read_float_perf.cpp ${SRCS})
add_executable (read_float_perf read_float_perf.cpp)
target_link_libraries (read_float_perf clickhouse_common_io)
add_executable (write_buffer write_buffer.cpp ${SRCS})
add_executable (write_buffer write_buffer.cpp)
target_link_libraries (write_buffer clickhouse_common_io)
add_executable (write_buffer_perf write_buffer_perf.cpp ${SRCS})
add_executable (write_buffer_perf write_buffer_perf.cpp)
target_link_libraries (write_buffer_perf clickhouse_common_io)
add_executable (valid_utf8_perf valid_utf8_perf.cpp ${SRCS})
add_executable (valid_utf8_perf valid_utf8_perf.cpp)
target_link_libraries (valid_utf8_perf clickhouse_common_io)
add_executable (valid_utf8 valid_utf8.cpp ${SRCS})
add_executable (valid_utf8 valid_utf8.cpp)
target_link_libraries (valid_utf8 clickhouse_common_io)
add_executable (compressed_buffer compressed_buffer.cpp ${SRCS})
add_executable (compressed_buffer compressed_buffer.cpp)
target_link_libraries (compressed_buffer clickhouse_common_io)
add_executable (var_uint var_uint.cpp ${SRCS})
add_executable (var_uint var_uint.cpp)
target_link_libraries (var_uint clickhouse_common_io)
add_executable (read_escaped_string read_escaped_string.cpp ${SRCS})
add_executable (read_escaped_string read_escaped_string.cpp)
target_link_libraries (read_escaped_string clickhouse_common_io)
add_executable (async_write async_write.cpp ${SRCS})
add_executable (async_write async_write.cpp)
target_link_libraries (async_write clickhouse_common_io)
add_executable (parse_int_perf parse_int_perf.cpp ${SRCS})
add_executable (parse_int_perf parse_int_perf.cpp)
target_link_libraries (parse_int_perf clickhouse_common_io)
add_executable (parse_int_perf2 parse_int_perf2.cpp ${SRCS})
add_executable (parse_int_perf2 parse_int_perf2.cpp)
target_link_libraries (parse_int_perf2 clickhouse_common_io)
add_executable (read_write_int read_write_int.cpp ${SRCS})
add_executable (read_write_int read_write_int.cpp)
target_link_libraries (read_write_int clickhouse_common_io)
add_executable (mempbrk mempbrk.cpp ${SRCS})
add_executable (mempbrk mempbrk.cpp)
target_link_libraries (mempbrk clickhouse_common_io)
add_executable (cached_compressed_read_buffer cached_compressed_read_buffer.cpp ${SRCS})
add_executable (cached_compressed_read_buffer cached_compressed_read_buffer.cpp)
target_link_libraries (cached_compressed_read_buffer clickhouse_common_io)
add_executable (o_direct_and_dirty_pages o_direct_and_dirty_pages.cpp ${SRCS})
add_executable (o_direct_and_dirty_pages o_direct_and_dirty_pages.cpp)
target_link_libraries (o_direct_and_dirty_pages clickhouse_common_io)
add_executable (hashing_write_buffer hashing_write_buffer.cpp ${SRCS})
add_executable (hashing_write_buffer hashing_write_buffer.cpp)
target_link_libraries (hashing_write_buffer clickhouse_common_io)
add_check(hashing_write_buffer)
@ -60,7 +59,7 @@ add_executable (hashing_read_buffer hashing_read_buffer.cpp)
target_link_libraries (hashing_read_buffer clickhouse_common_io)
add_check (hashing_read_buffer)
add_executable (io_operators operators.cpp ${SRCS})
add_executable (io_operators operators.cpp)
target_link_libraries (io_operators clickhouse_common_io)
if (NOT APPLE AND NOT ARCH_FREEBSD)
@ -71,17 +70,14 @@ if (NOT APPLE AND NOT ARCH_FREEBSD)
target_link_libraries (read_buffer_aio clickhouse_common_io ${Boost_FILESYSTEM_LIBRARY})
endif ()
add_executable (zlib_buffers zlib_buffers.cpp ${SRCS})
add_executable (zlib_buffers zlib_buffers.cpp)
target_link_libraries (zlib_buffers clickhouse_common_io)
add_executable (remote_read_write_buffer remote_read_write_buffer.cpp ${SRCS})
target_link_libraries (remote_read_write_buffer clickhouse_common_io)
add_executable (limit_read_buffer limit_read_buffer.cpp ${SRCS})
add_executable (limit_read_buffer limit_read_buffer.cpp)
target_link_libraries (limit_read_buffer clickhouse_common_io)
add_executable (limit_read_buffer2 limit_read_buffer2.cpp ${SRCS})
add_executable (limit_read_buffer2 limit_read_buffer2.cpp)
target_link_libraries (limit_read_buffer2 clickhouse_common_io)
add_executable (parse_date_time_best_effort parse_date_time_best_effort.cpp ${SRCS})
add_executable (parse_date_time_best_effort parse_date_time_best_effort.cpp)
target_link_libraries (parse_date_time_best_effort clickhouse_common_io)

File diff suppressed because it is too large Load Diff

View File

@ -1,19 +0,0 @@
#include <iostream>
#include <IO/RemoteReadBuffer.h>
#include <IO/RemoteWriteBuffer.h>
int main(int, char **)
try
{
DB::RemoteReadBuffer({}, {}, {});
DB::RemoteWriteBuffer({}, {}, {});
return 0;
}
catch (const DB::Exception & e)
{
std::cerr << e.what() << ", " << e.displayText() << std::endl;
return 1;
}

View File

@ -837,6 +837,7 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants)
Stopwatch watch;
size_t rows = data_variants.size();
Poco::File(params.tmp_path).createDirectories();
auto file = std::make_unique<Poco::TemporaryFile>(params.tmp_path);
const std::string & path = file->path();
WriteBufferFromFile file_buf(path);

View File

@ -1,5 +1,5 @@
#include <Interpreters/Cluster.h>
#include <Common/DNSCache.h>
#include <Common/DNSResolver.h>
#include <Common/escapeForFileName.h>
#include <Common/isLocalAddress.h>
#include <Common/SimpleCache.h>
@ -29,7 +29,7 @@ namespace
/// Default shard weight.
static constexpr UInt32 default_weight = 1;
inline bool isLocal(const Cluster::Address & address, UInt16 clickhouse_port)
inline bool isLocal(const Cluster::Address & address, const Poco::Net::SocketAddress & resolved_address, UInt16 clickhouse_port)
{
/// If there is replica, for which:
/// - its port is the same that the server is listening;
@ -41,13 +41,7 @@ inline bool isLocal(const Cluster::Address & address, UInt16 clickhouse_port)
/// Also, replica is considered non-local, if it has default database set
/// (only reason is to avoid query rewrite).
return address.default_database.empty() && isLocalAddress(address.resolved_address, clickhouse_port);
}
Poco::Net::SocketAddress resolveSocketAddress(const String & host, UInt16 port)
{
return Poco::Net::SocketAddress(DNSCache::instance().resolveHost(host), port);
return address.default_database.empty() && isLocalAddress(resolved_address, clickhouse_port);
}
}
@ -56,15 +50,15 @@ Poco::Net::SocketAddress resolveSocketAddress(const String & host, UInt16 port)
Cluster::Address::Address(Poco::Util::AbstractConfiguration & config, const String & config_prefix)
{
UInt16 clickhouse_port = config.getInt("tcp_port", 0);
UInt16 clickhouse_port = static_cast<UInt16>(config.getInt("tcp_port", 0));
host_name = config.getString(config_prefix + ".host");
port = static_cast<UInt16>(config.getInt(config_prefix + ".port"));
resolved_address = resolveSocketAddress(host_name, port);
user = config.getString(config_prefix + ".user", "default");
password = config.getString(config_prefix + ".password", "");
default_database = config.getString(config_prefix + ".default_database", "");
is_local = isLocal(*this, clickhouse_port);
initially_resolved_address = DNSResolver::instance().resolveAddress(host_name, port);
is_local = isLocal(*this, initially_resolved_address, clickhouse_port);
secure = config.getBool(config_prefix + ".secure", false) ? Protocol::Secure::Enable : Protocol::Secure::Disable;
compression = config.getBool(config_prefix + ".compression", true) ? Protocol::Compression::Enable : Protocol::Compression::Disable;
}
@ -74,11 +68,11 @@ Cluster::Address::Address(const String & host_port_, const String & user_, const
: user(user_), password(password_)
{
auto parsed_host_port = parseAddress(host_port_, clickhouse_port);
resolved_address = resolveSocketAddress(parsed_host_port.first, parsed_host_port.second);
host_name = parsed_host_port.first;
port = parsed_host_port.second;
is_local = isLocal(*this, clickhouse_port);
initially_resolved_address = DNSResolver::instance().resolveAddress(parsed_host_port.first, parsed_host_port.second);
is_local = isLocal(*this, initially_resolved_address, clickhouse_port);
}
@ -94,7 +88,16 @@ String Cluster::Address::toString(const String & host_name, UInt16 port)
String Cluster::Address::readableString() const
{
return host_name + ':' + DB::toString(port);
String res;
/// If it looks like IPv6 address add braces to avoid ambiguity in ipv6_host:port notation
if (host_name.find_first_of(':') != std::string::npos && !host_name.empty() && host_name.back() != ']')
res += '[' + host_name + ']';
else
res += host_name;
res += ':' + DB::toString(port);
return res;
}
void Cluster::Address::fromString(const String & host_port_string, String & host_name, UInt16 & port)
@ -113,8 +116,8 @@ String Cluster::Address::toStringFull() const
return
escapeForFileName(user) +
(password.empty() ? "" : (':' + escapeForFileName(password))) + '@' +
escapeForFileName(resolved_address.host().toString()) + ':' +
std::to_string(resolved_address.port()) +
escapeForFileName(host_name) + ':' +
std::to_string(port) +
(default_database.empty() ? "" : ('#' + escapeForFileName(default_database)))
+ ((secure == Protocol::Secure::Enable) ? "+secure" : "");
}
@ -220,7 +223,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
{
ConnectionPoolPtr pool = std::make_shared<ConnectionPool>(
settings.distributed_connections_pool_size,
address.host_name, address.port, address.resolved_address,
address.host_name, address.port,
address.default_database, address.user, address.password,
ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings).getSaturated(settings.max_execution_time),
"server", address.compression, address.secure);
@ -252,7 +255,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false);
/// in case of internal_replication we will be appending names to dir_name_for_internal_replication
/// In case of internal_replication we will be appending names to dir_name_for_internal_replication
std::string dir_name_for_internal_replication;
auto first = true;
@ -303,7 +306,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
{
auto replica_pool = std::make_shared<ConnectionPool>(
settings.distributed_connections_pool_size,
replica.host_name, replica.port, replica.resolved_address,
replica.host_name, replica.port,
replica.default_database, replica.user, replica.password,
ConnectionTimeouts::getTCPTimeoutsWithFailover(settings).getSaturated(settings.max_execution_time),
"server", replica.compression, replica.secure);
@ -367,7 +370,7 @@ Cluster::Cluster(const Settings & settings, const std::vector<std::vector<String
{
auto replica_pool = std::make_shared<ConnectionPool>(
settings.distributed_connections_pool_size,
replica.host_name, replica.port, replica.resolved_address,
replica.host_name, replica.port,
replica.default_database, replica.user, replica.password,
ConnectionTimeouts::getTCPTimeoutsWithFailover(settings).getSaturated(settings.max_execution_time),
"server", replica.compression, replica.secure);

View File

@ -52,13 +52,15 @@ public:
* </replica>
* </shard>
*/
Poco::Net::SocketAddress resolved_address;
String host_name;
UInt16 port;
String user;
String password;
String default_database; /// this database is selected when no database is specified for Distributed table
/// This database is selected when no database is specified for Distributed table
String default_database;
UInt32 replica_num;
/// The locality is determined at the initialization, and is not changed even if DNS is changed
bool is_local;
Protocol::Compression compression = Protocol::Compression::Enable;
Protocol::Secure secure = Protocol::Secure::Disable;
@ -79,6 +81,15 @@ public:
/// Retrurns escaped user:password@resolved_host_address:resolved_host_port#default_database
String toStringFull() const;
/// Returns initially resolved address
Poco::Net::SocketAddress getResolvedAddress() const
{
return initially_resolved_address;
}
private:
Poco::Net::SocketAddress initially_resolved_address;
};
using Addresses = std::vector<Address>;

View File

@ -14,7 +14,7 @@ BlockExtraInfo toBlockExtraInfo(const Cluster::Address & address)
{
BlockExtraInfo block_extra_info;
block_extra_info.host = address.host_name;
block_extra_info.resolved_address = address.resolved_address.toString();
block_extra_info.resolved_address = address.getResolvedAddress().toString();
block_extra_info.port = address.port;
block_extra_info.user = address.user;
block_extra_info.is_valid = true;

View File

@ -39,7 +39,7 @@
#include <Interpreters/QueryLog.h>
#include <Interpreters/PartLog.h>
#include <Interpreters/Context.h>
#include <Common/DNSCache.h>
#include <Common/DNSResolver.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/UncompressedCache.h>
#include <Parsers/ASTCreateQuery.h>
@ -1405,10 +1405,29 @@ std::shared_ptr<Cluster> Context::tryGetCluster(const std::string & cluster_name
void Context::reloadClusterConfig()
{
while (true)
{
ConfigurationPtr cluster_config;
{
std::lock_guard<std::mutex> lock(shared->clusters_mutex);
auto & config = shared->clusters_config ? *shared->clusters_config : getConfigRef();
shared->clusters = std::make_unique<Clusters>(config, settings);
cluster_config = shared->clusters_config;
}
auto & config = cluster_config ? *cluster_config : getConfigRef();
auto new_clusters = std::make_unique<Clusters>(config, settings);
{
std::lock_guard<std::mutex> lock(shared->clusters_mutex);
if (shared->clusters_config.get() == cluster_config.get())
{
shared->clusters = std::move(new_clusters);
return;
}
/// Clusters config has been suddenly changed, recompute clusters
}
}
}

View File

@ -16,7 +16,7 @@
#include <Interpreters/executeQuery.h>
#include <Interpreters/Cluster.h>
#include <Common/DNSCache.h>
#include <Common/DNSResolver.h>
#include <Common/Macros.h>
#include <Common/getFQDNOrHostName.h>
@ -39,6 +39,7 @@
#include <random>
#include <pcg_random.hpp>
#include <Poco/Net/NetException.h>
namespace DB
@ -93,9 +94,9 @@ struct HostID
{
try
{
return DB::isLocalAddress(Poco::Net::SocketAddress(host_name, port), clickhouse_port);
return DB::isLocalAddress(DNSResolver::instance().resolveAddress(host_name, port), clickhouse_port);
}
catch (const Poco::Exception & e)
catch (const Poco::Net::NetException & e)
{
/// Avoid "Host not found" exceptions
return false;
@ -480,7 +481,7 @@ void DDLWorker::parseQueryAndResolveHost(DDLTask & task)
{
const Cluster::Address & address = shards[shard_num][replica_num];
if (isLocalAddress(address.resolved_address, context.getTCPPort()))
if (isLocalAddress(address.getResolvedAddress(), context.getTCPPort()))
{
if (found_via_resolving)
{
@ -634,43 +635,51 @@ void DDLWorker::processTaskAlter(
if (execute_once_on_replica && !config_is_replicated_shard)
{
throw Exception("Table " + ast_alter->table + " is replicated, but shard #" + toString(task.host_shard_num + 1) +
" isn't replicated according to its cluster definition", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
" isn't replicated according to its cluster definition."
" Possibly <internal_replication>true</internal_replication> is forgotten in the cluster config.",
ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
}
else if (!execute_once_on_replica && config_is_replicated_shard)
if (!execute_once_on_replica && config_is_replicated_shard)
{
throw Exception("Table " + ast_alter->table + " isn't replicated, but shard #" + toString(task.host_shard_num + 1) +
" is replicated according to its cluster definition", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
}
if (execute_once_on_replica)
{
/// Generate unique name for shard node, it will be used to execute the query by only single host
/// Shard node name has format 'replica_name1,replica_name2,...,replica_nameN'
/// Where replica_name is 'escape(replica_ip_address):replica_port'
/// FIXME: this replica_name could be changed after replica restart
/// Where replica_name is 'replica_config_host_name:replica_port'
auto get_shard_name = [] (const Cluster::Addresses & shard_addresses)
{
Strings replica_names;
for (const Cluster::Address & address : task.cluster->getShardsAddresses().at(task.host_shard_num))
replica_names.emplace_back(address.resolved_address.host().toString());
for (const Cluster::Address & address : shard_addresses)
replica_names.emplace_back(address.readableString());
std::sort(replica_names.begin(), replica_names.end());
String shard_node_name;
String res;
for (auto it = replica_names.begin(); it != replica_names.end(); ++it)
shard_node_name += *it + (std::next(it) != replica_names.end() ? "," : "");
res += *it + (std::next(it) != replica_names.end() ? "," : "");
return res;
};
if (execute_once_on_replica)
{
String shard_node_name = get_shard_name(task.cluster->getShardsAddresses().at(task.host_shard_num));
String shard_path = node_path + "/shards/" + shard_node_name;
String is_executed_path = shard_path + "/executed";
zookeeper->createAncestors(shard_path + "/");
bool alter_executed_by_any_replica = false;
bool is_executed_by_any_replica = false;
{
auto lock = createSimpleZooKeeperLock(zookeeper, shard_path, "lock", task.host_id_str);
pcg64 rng(randomSeed());
for (int num_tries = 0; num_tries < 10; ++num_tries)
static const size_t max_tries = 20;
for (size_t num_tries = 0; num_tries < max_tries; ++num_tries)
{
if (zookeeper->exists(is_executed_path))
{
alter_executed_by_any_replica = true;
is_executed_by_any_replica = true;
break;
}
@ -685,16 +694,19 @@ void DDLWorker::processTaskAlter(
zookeeper->create(is_executed_path, task.host_id_str, zkutil::CreateMode::Persistent);
lock->unlock();
alter_executed_by_any_replica = true;
is_executed_by_any_replica = true;
break;
}
std::this_thread::sleep_for(std::chrono::duration<double>(std::uniform_real_distribution<double>(0, 1)(rng)));
std::this_thread::sleep_for(std::chrono::milliseconds(std::uniform_int_distribution<long>(0, 1000)(rng)));
}
}
if (!alter_executed_by_any_replica)
task.execution_status = ExecutionStatus(ErrorCodes::NOT_IMPLEMENTED, "Cannot enqueue replicated DDL query");
if (!is_executed_by_any_replica)
{
task.execution_status = ExecutionStatus(ErrorCodes::NOT_IMPLEMENTED,
"Cannot enqueue replicated DDL query for a replicated shard");
}
}
else
{

View File

@ -0,0 +1,118 @@
#include "DNSCacheUpdater.h"
#include <Common/DNSResolver.h>
#include <Interpreters/Context.h>
#include <Storages/MergeTree/BackgroundProcessingPool.h>
#include <Common/ProfileEvents.h>
#include <Poco/Net/NetException.h>
#include <common/logger_useful.h>
namespace ProfileEvents
{
extern Event NetworkErrors;
}
namespace DB
{
using BackgroundProcessingPoolTaskInfo = BackgroundProcessingPool::TaskInfo;
namespace ErrorCodes
{
extern const int TIMEOUT_EXCEEDED;
extern const int ALL_CONNECTION_TRIES_FAILED;
}
/// Call it inside catch section
/// Returns true if it is a network error
static bool isNetworkError()
{
try
{
throw;
}
catch (const Exception & e)
{
if (e.code() == ErrorCodes::TIMEOUT_EXCEEDED || e.code() == ErrorCodes::ALL_CONNECTION_TRIES_FAILED)
return true;
}
catch (Poco::Net::DNSException & e)
{
return true;
}
catch (Poco::TimeoutException & e)
{
return true;
}
catch (...)
{
/// Do nothing
}
return false;
}
DNSCacheUpdater::DNSCacheUpdater(Context & context_)
: context(context_), pool(context_.getBackgroundPool())
{
task_handle = pool.addTask([this] () { return run(); });
}
bool DNSCacheUpdater::run()
{
/// TODO: Ensusre that we get global counter (not thread local)
auto num_current_network_exceptions = ProfileEvents::counters[ProfileEvents::NetworkErrors].load(std::memory_order_relaxed);
if (num_current_network_exceptions >= last_num_network_erros + min_errors_to_update_cache
&& time(nullptr) > last_update_time + min_update_period_seconds)
{
try
{
LOG_INFO(&Poco::Logger::get("DNSCacheUpdater"), "Updating DNS cache");
DNSResolver::instance().dropCache();
context.reloadClusterConfig();
last_num_network_erros = num_current_network_exceptions;
last_update_time = time(nullptr);
return true;
}
catch (...)
{
/// Do not increment ProfileEvents::NetworkErrors twice
if (isNetworkError())
return false;
throw;
}
}
/// According to BackgroundProcessingPool logic, if task has done work, it could be executed again immediately.
return false;
}
DNSCacheUpdater::~DNSCacheUpdater()
{
if (task_handle)
pool.removeTask(task_handle);
task_handle.reset();
}
bool DNSCacheUpdater::incrementNetworkErrorEventsIfNeeded()
{
if (isNetworkError())
{
ProfileEvents::increment(ProfileEvents::NetworkErrors);
return true;
}
return false;
}
}

View File

@ -0,0 +1,38 @@
#pragma once
#include <memory>
namespace DB
{
class Context;
class BackgroundProcessingPool;
class BackgroundProcessingPoolTaskInfo;
/// Add a task to BackgroundProcessingPool that watch for ProfileEvents::NetworkErrors and updates DNS cache if it has increased
class DNSCacheUpdater
{
public:
explicit DNSCacheUpdater(Context & context);
~DNSCacheUpdater();
/// Checks if it is a network error and increments ProfileEvents::NetworkErrors
static bool incrementNetworkErrorEventsIfNeeded();
private:
bool run();
Context & context;
BackgroundProcessingPool & pool;
std::shared_ptr<BackgroundProcessingPoolTaskInfo> task_handle;
size_t last_num_network_erros = 0;
time_t last_update_time = 0;
static constexpr size_t min_errors_to_update_cache = 3;
static constexpr time_t min_update_period_seconds = 45;
};
}

View File

@ -1,5 +1,7 @@
#include <Common/config.h>
#include <Common/ProfileEvents.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionJIT.h>
#include <Interpreters/Join.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnArray.h>
@ -752,6 +754,13 @@ void ExpressionActions::finalize(const Names & output_columns)
final_columns.insert(name);
}
#if USE_EMBEDDED_COMPILER
/// This has to be done before removing redundant actions and inserting REMOVE_COLUMNs
/// because inlining may change dependency sets.
if (settings.compile_expressions)
compileFunctions(actions, output_columns, sample_block);
#endif
/// Which columns are needed to perform actions from the current to the last.
NameSet needed_columns = final_columns;
/// Which columns nobody will touch from the current action to the last.
@ -934,7 +943,7 @@ void ExpressionActions::finalize(const Names & output_columns)
std::cerr << action.toString() << "\n";
std::cerr << "\n";*/
optimize();
optimizeArrayJoin();
checkLimits(sample_block);
}
@ -959,11 +968,6 @@ std::string ExpressionActions::dumpActions() const
return ss.str();
}
void ExpressionActions::optimize()
{
optimizeArrayJoin();
}
void ExpressionActions::optimizeArrayJoin()
{
const size_t NONE = actions.size();

View File

@ -215,8 +215,6 @@ private:
void addImpl(ExpressionAction action, Names & new_names);
/// Try to improve something without changing the lists of input and output columns.
void optimize();
/// Move all arrayJoin as close as possible to the end.
void optimizeArrayJoin();
};

View File

@ -0,0 +1,652 @@
#include <Interpreters/ExpressionJIT.h>
#if USE_EMBEDDED_COMPILER
#include <optional>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnVector.h>
#include <Common/typeid_cast.h>
#include <Common/ProfileEvents.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/Native.h>
#include <Functions/IFunction.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/Config/llvm-config.h>
#include <llvm/IR/BasicBlock.h>
#include <llvm/IR/DataLayout.h>
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Mangler.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Type.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/JITSymbol.h>
#include <llvm/ExecutionEngine/SectionMemoryManager.h>
#include <llvm/ExecutionEngine/Orc/CompileUtils.h>
#include <llvm/ExecutionEngine/Orc/IRCompileLayer.h>
#include <llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h>
#include <llvm/Target/TargetMachine.h>
#include <llvm/MC/SubtargetFeature.h>
#include <llvm/Support/DynamicLibrary.h>
#include <llvm/Support/Host.h>
#include <llvm/Support/TargetRegistry.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
#pragma GCC diagnostic pop
namespace ProfileEvents
{
extern const Event CompileFunction;
}
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int CANNOT_COMPILE_CODE;
}
namespace
{
struct ColumnData
{
const char * data = nullptr;
const char * null = nullptr;
size_t stride;
};
struct ColumnDataPlaceholder
{
llvm::Value * data_init; /// first row
llvm::Value * null_init;
llvm::Value * stride;
llvm::PHINode * data; /// current row
llvm::PHINode * null;
};
}
static ColumnData getColumnData(const IColumn * column)
{
ColumnData result;
const bool is_const = column->isColumnConst();
if (is_const)
column = &reinterpret_cast<const ColumnConst *>(column)->getDataColumn();
if (auto * nullable = typeid_cast<const ColumnNullable *>(column))
{
result.null = nullable->getNullMapColumn().getRawData().data;
column = &nullable->getNestedColumn();
}
result.data = column->getRawData().data;
result.stride = is_const ? 0 : column->sizeOfValueIfFixed();
return result;
}
static void applyFunction(IFunctionBase & function, Field & value)
{
const auto & type = function.getArgumentTypes().at(0);
Block block = {{ type->createColumnConst(1, value), type, "x" }, { nullptr, function.getReturnType(), "y" }};
function.execute(block, {0}, 1, 1);
block.safeGetByPosition(1).column->get(0, value);
}
static llvm::TargetMachine * getNativeMachine()
{
std::string error;
auto cpu = llvm::sys::getHostCPUName();
auto triple = llvm::sys::getProcessTriple();
auto target = llvm::TargetRegistry::lookupTarget(triple, error);
if (!target)
throw Exception("Could not initialize native target: " + error, ErrorCodes::CANNOT_COMPILE_CODE);
llvm::SubtargetFeatures features;
llvm::StringMap<bool> feature_map;
if (llvm::sys::getHostCPUFeatures(feature_map))
for (auto& f : feature_map)
features.AddFeature(f.first(), f.second);
llvm::TargetOptions options;
return target->createTargetMachine(
triple, cpu, features.getString(), options, llvm::None,
#if LLVM_VERSION_MAJOR >= 6
llvm::None, llvm::CodeGenOpt::Default, /*jit=*/true
#else
llvm::CodeModel::Default, llvm::CodeGenOpt::Default
#endif
);
}
#if LLVM_VERSION_MAJOR >= 7
auto wrapJITSymbolResolver(llvm::JITSymbolResolver & jsr)
{
auto flags = [&](llvm::orc::SymbolFlagsMap & flags, const llvm::orc::SymbolNameSet & symbols)
{
llvm::orc::SymbolNameSet missing;
for (const auto & symbol : symbols)
{
auto resolved = jsr.lookupFlags({*symbol});
if (resolved && resolved->size())
flags.emplace(symbol, resolved->begin()->second);
else
missing.emplace(symbol);
}
return missing;
};
auto symbols = [&](std::shared_ptr<llvm::orc::AsynchronousSymbolQuery> query, llvm::orc::SymbolNameSet symbols)
{
llvm::orc::SymbolNameSet missing;
for (const auto & symbol : symbols)
{
auto resolved = jsr.lookup({*symbol});
if (resolved && resolved->size())
query->resolve(symbol, resolved->begin()->second);
else
missing.emplace(symbol);
}
return missing;
};
return llvm::orc::createSymbolResolver(flags, symbols);
}
#endif
struct LLVMContext
{
llvm::LLVMContext context;
#if LLVM_VERSION_MAJOR >= 7
llvm::orc::ExecutionSession execution_session;
std::unique_ptr<llvm::Module> module;
#else
std::shared_ptr<llvm::Module> module;
#endif
std::unique_ptr<llvm::TargetMachine> machine;
std::shared_ptr<llvm::SectionMemoryManager> memory_manager;
llvm::orc::RTDyldObjectLinkingLayer object_layer;
llvm::orc::IRCompileLayer<decltype(object_layer), llvm::orc::SimpleCompiler> compile_layer;
llvm::DataLayout layout;
llvm::IRBuilder<> builder;
std::unordered_map<std::string, void *> symbols;
LLVMContext()
#if LLVM_VERSION_MAJOR >= 7
: module(std::make_unique<llvm::Module>("jit", context))
#else
: module(std::make_shared<llvm::Module>("jit", context))
#endif
, machine(getNativeMachine())
, memory_manager(std::make_shared<llvm::SectionMemoryManager>())
#if LLVM_VERSION_MAJOR >= 7
, object_layer(execution_session, [this](llvm::orc::VModuleKey)
{
return llvm::orc::RTDyldObjectLinkingLayer::Resources{memory_manager, wrapJITSymbolResolver(*memory_manager)};
})
#else
, object_layer([this]() { return memory_manager; })
#endif
, compile_layer(object_layer, llvm::orc::SimpleCompiler(*machine))
, layout(machine->createDataLayout())
, builder(context)
{
module->setDataLayout(layout);
module->setTargetTriple(machine->getTargetTriple().getTriple());
}
void finalize()
{
if (!module->size())
return;
llvm::PassManagerBuilder builder;
llvm::legacy::PassManager mpm;
llvm::legacy::FunctionPassManager fpm(module.get());
builder.OptLevel = 3;
builder.SLPVectorize = true;
builder.LoopVectorize = true;
builder.RerollLoops = true;
builder.VerifyInput = true;
builder.VerifyOutput = true;
machine->adjustPassManager(builder);
fpm.add(llvm::createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
mpm.add(llvm::createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
builder.populateFunctionPassManager(fpm);
builder.populateModulePassManager(mpm);
fpm.doInitialization();
for (auto & function : *module)
fpm.run(function);
fpm.doFinalization();
mpm.run(*module);
std::vector<std::string> functions;
functions.reserve(module->size());
for (const auto & function : *module)
functions.emplace_back(function.getName());
#if LLVM_VERSION_MAJOR >= 7
llvm::orc::VModuleKey module_key = execution_session.allocateVModule();
if (compile_layer.addModule(module_key, std::move(module)))
throw Exception("Cannot add module to compile layer", ErrorCodes::CANNOT_COMPILE_CODE);
#else
if (!compile_layer.addModule(module, memory_manager))
throw Exception("Cannot add module to compile layer", ErrorCodes::CANNOT_COMPILE_CODE);
#endif
for (const auto & name : functions)
{
std::string mangled_name;
llvm::raw_string_ostream mangled_name_stream(mangled_name);
llvm::Mangler::getNameWithPrefix(mangled_name_stream, name, layout);
mangled_name_stream.flush();
auto symbol = compile_layer.findSymbol(mangled_name, false);
if (!symbol)
continue; /// external function (e.g. an intrinsic that calls into libc)
auto address = symbol.getAddress();
if (!address)
throw Exception("Function " + name + " failed to link", ErrorCodes::CANNOT_COMPILE_CODE);
symbols[name] = reinterpret_cast<void *>(*address);
}
}
};
class LLVMPreparedFunction : public PreparedFunctionImpl
{
std::string name;
std::shared_ptr<LLVMContext> context;
void * function;
public:
LLVMPreparedFunction(std::string name_, std::shared_ptr<LLVMContext> context)
: name(std::move(name_)), context(context), function(context->symbols.at(name))
{}
String getName() const override { return name; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t block_size) override
{
auto col_res = block.getByPosition(result).type->createColumn()->cloneResized(block_size);
if (block_size)
{
std::vector<ColumnData> columns(arguments.size() + 1);
for (size_t i = 0; i < arguments.size(); ++i)
{
auto * column = block.getByPosition(arguments[i]).column.get();
if (!column)
throw Exception("Column " + block.getByPosition(arguments[i]).name + " is missing", ErrorCodes::LOGICAL_ERROR);
columns[i] = getColumnData(column);
}
columns[arguments.size()] = getColumnData(col_res.get());
reinterpret_cast<void (*) (size_t, ColumnData *)>(function)(block_size, columns.data());
}
block.getByPosition(result).column = std::move(col_res);
};
};
static void compileFunction(std::shared_ptr<LLVMContext> & context, const IFunctionBase & f)
{
ProfileEvents::increment(ProfileEvents::CompileFunction);
auto & arg_types = f.getArgumentTypes();
auto & b = context->builder;
auto * size_type = b.getIntNTy(sizeof(size_t) * 8);
auto * data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy(), size_type);
auto * func_type = llvm::FunctionType::get(b.getVoidTy(), { size_type, data_type->getPointerTo() }, /*isVarArg=*/false);
auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, f.getName(), context->module.get());
auto args = func->args().begin();
llvm::Value * counter_arg = &*args++;
llvm::Value * columns_arg = &*args++;
auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", func);
b.SetInsertPoint(entry);
std::vector<ColumnDataPlaceholder> columns(arg_types.size() + 1);
for (size_t i = 0; i <= arg_types.size(); ++i)
{
auto & type = i == arg_types.size() ? f.getReturnType() : arg_types[i];
auto * data = b.CreateLoad(b.CreateConstInBoundsGEP1_32(data_type, columns_arg, i));
columns[i].data_init = b.CreatePointerCast(b.CreateExtractValue(data, {0}), toNativeType(b, removeNullable(type))->getPointerTo());
columns[i].null_init = type->isNullable() ? b.CreateExtractValue(data, {1}) : nullptr;
columns[i].stride = b.CreateExtractValue(data, {2});
}
/// assume nonzero initial value in `counter_arg`
auto * loop = llvm::BasicBlock::Create(b.getContext(), "loop", func);
b.CreateBr(loop);
b.SetInsertPoint(loop);
auto * counter_phi = b.CreatePHI(counter_arg->getType(), 2);
counter_phi->addIncoming(counter_arg, entry);
for (auto & col : columns)
{
col.data = b.CreatePHI(col.data_init->getType(), 2);
col.data->addIncoming(col.data_init, entry);
if (col.null_init)
{
col.null = b.CreatePHI(col.null_init->getType(), 2);
col.null->addIncoming(col.null_init, entry);
}
}
ValuePlaceholders arguments(arg_types.size());
for (size_t i = 0; i < arguments.size(); ++i)
{
arguments[i] = [&b, &col = columns[i], &type = arg_types[i]]() -> llvm::Value *
{
auto * value = b.CreateLoad(col.data);
if (!col.null)
return value;
auto * is_null = b.CreateICmpNE(b.CreateLoad(col.null), b.getInt8(0));
auto * nullable = llvm::Constant::getNullValue(toNativeType(b, type));
return b.CreateInsertValue(b.CreateInsertValue(nullable, value, {0}), is_null, {1});
};
}
auto * result = f.compile(b, std::move(arguments));
if (columns.back().null)
{
b.CreateStore(b.CreateExtractValue(result, {0}), columns.back().data);
b.CreateStore(b.CreateSelect(b.CreateExtractValue(result, {1}), b.getInt8(1), b.getInt8(0)), columns.back().null);
}
else
{
b.CreateStore(result, columns.back().data);
}
auto * cur_block = b.GetInsertBlock();
for (auto & col : columns)
{
/// stride is either 0 or size of native type; output column is never constant; neither is at least one input
auto * is_const = &col == &columns.back() || columns.size() <= 2 ? b.getFalse() : b.CreateICmpEQ(col.stride, llvm::ConstantInt::get(size_type, 0));
col.data->addIncoming(b.CreateSelect(is_const, col.data, b.CreateConstInBoundsGEP1_32(nullptr, col.data, 1)), cur_block);
if (col.null)
col.null->addIncoming(b.CreateSelect(is_const, col.null, b.CreateConstInBoundsGEP1_32(nullptr, col.null, 1)), cur_block);
}
counter_phi->addIncoming(b.CreateSub(counter_phi, llvm::ConstantInt::get(size_type, 1)), cur_block);
auto * end = llvm::BasicBlock::Create(b.getContext(), "end", func);
b.CreateCondBr(b.CreateICmpNE(counter_phi, llvm::ConstantInt::get(size_type, 1)), loop, end);
b.SetInsertPoint(end);
b.CreateRetVoid();
}
static llvm::Constant * getNativeValue(llvm::Type * type, const IColumn & column, size_t i)
{
if (!type)
return nullptr;
if (auto * constant = typeid_cast<const ColumnConst *>(&column))
return getNativeValue(type, constant->getDataColumn(), 0);
if (auto * nullable = typeid_cast<const ColumnNullable *>(&column))
{
auto * value = getNativeValue(type->getContainedType(0), nullable->getNestedColumn(), i);
auto * is_null = llvm::ConstantInt::get(type->getContainedType(1), nullable->isNullAt(i));
return value ? llvm::ConstantStruct::get(static_cast<llvm::StructType *>(type), value, is_null) : nullptr;
}
if (type->isFloatTy())
return llvm::ConstantFP::get(type, static_cast<const ColumnVector<Float32> &>(column).getElement(i));
if (type->isDoubleTy())
return llvm::ConstantFP::get(type, static_cast<const ColumnVector<Float64> &>(column).getElement(i));
if (type->isIntegerTy())
return llvm::ConstantInt::get(type, column.getUInt(i));
/// TODO: if (type->isVectorTy())
return nullptr;
}
/// Same as IFunctionBase::compile, but also for constants and input columns.
using CompilableExpression = std::function<llvm::Value * (llvm::IRBuilderBase &, const ValuePlaceholders &)>;
static CompilableExpression subexpression(ColumnPtr c, DataTypePtr type)
{
return [=](llvm::IRBuilderBase & b, const ValuePlaceholders &) { return getNativeValue(toNativeType(b, type), *c, 0); };
}
static CompilableExpression subexpression(size_t i)
{
return [=](llvm::IRBuilderBase &, const ValuePlaceholders & inputs) { return inputs[i](); };
}
static CompilableExpression subexpression(const IFunctionBase & f, std::vector<CompilableExpression> args)
{
return [&, args = std::move(args)](llvm::IRBuilderBase & builder, const ValuePlaceholders & inputs)
{
ValuePlaceholders input;
for (const auto & arg : args)
input.push_back([&]() { return arg(builder, inputs); });
auto * result = f.compile(builder, input);
if (result->getType() != toNativeType(builder, f.getReturnType()))
throw Exception("Function " + f.getName() + " generated an llvm::Value of invalid type", ErrorCodes::LOGICAL_ERROR);
return result;
};
}
class LLVMFunction : public IFunctionBase
{
std::string name;
Names arg_names;
DataTypes arg_types;
std::shared_ptr<LLVMContext> context;
std::vector<FunctionBasePtr> originals;
std::unordered_map<StringRef, CompilableExpression> subexpressions;
public:
LLVMFunction(const ExpressionActions::Actions & actions, std::shared_ptr<LLVMContext> context, const Block & sample_block)
: name(actions.back().result_name), context(context)
{
for (const auto & c : sample_block)
/// TODO: implement `getNativeValue` for all types & replace the check with `c.column && toNativeType(...)`
if (c.column && getNativeValue(toNativeType(context->builder, c.type), *c.column, 0))
subexpressions[c.name] = subexpression(c.column, c.type);
for (const auto & action : actions)
{
const auto & names = action.argument_names;
const auto & types = action.function->getArgumentTypes();
std::vector<CompilableExpression> args;
for (size_t i = 0; i < names.size(); ++i)
{
auto inserted = subexpressions.emplace(names[i], subexpression(arg_names.size()));
if (inserted.second)
{
arg_names.push_back(names[i]);
arg_types.push_back(types[i]);
}
args.push_back(inserted.first->second);
}
subexpressions[action.result_name] = subexpression(*action.function, std::move(args));
originals.push_back(action.function);
}
compileFunction(context, *this);
}
bool isCompilable() const override { return true; }
llvm::Value * compile(llvm::IRBuilderBase & builder, ValuePlaceholders values) const override { return subexpressions.at(name)(builder, values); }
String getName() const override { return name; }
const Names & getArgumentNames() const { return arg_names; }
const DataTypes & getArgumentTypes() const override { return arg_types; }
const DataTypePtr & getReturnType() const override { return originals.back()->getReturnType(); }
PreparedFunctionPtr prepare(const Block &) const override { return std::make_shared<LLVMPreparedFunction>(name, context); }
bool isDeterministic() override
{
for (const auto & f : originals)
if (!f->isDeterministic())
return false;
return true;
}
bool isDeterministicInScopeOfQuery() override
{
for (const auto & f : originals)
if (!f->isDeterministicInScopeOfQuery())
return false;
return true;
}
bool isSuitableForConstantFolding() const override
{
for (const auto & f : originals)
if (!f->isSuitableForConstantFolding())
return false;
return true;
}
bool isInjective(const Block & sample_block) override
{
for (const auto & f : originals)
if (!f->isInjective(sample_block))
return false;
return true;
}
bool hasInformationAboutMonotonicity() const override
{
for (const auto & f : originals)
if (!f->hasInformationAboutMonotonicity())
return false;
return true;
}
Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override
{
const IDataType * type_ = &type;
Field left_ = left;
Field right_ = right;
Monotonicity result(true, true, true);
/// monotonicity is only defined for unary functions, so the chain must describe a sequence of nested calls
for (size_t i = 0; i < originals.size(); ++i)
{
Monotonicity m = originals[i]->getMonotonicityForRange(*type_, left_, right_);
if (!m.is_monotonic)
return m;
result.is_positive ^= !m.is_positive;
result.is_always_monotonic &= m.is_always_monotonic;
if (i + 1 < originals.size())
{
if (left_ != Field())
applyFunction(*originals[i], left_);
if (right_ != Field())
applyFunction(*originals[i], right_);
if (!m.is_positive)
std::swap(left_, right_);
type_ = originals[i]->getReturnType().get();
}
}
return result;
}
};
static bool isCompilable(llvm::IRBuilderBase & builder, const IFunctionBase& function)
{
if (!toNativeType(builder, function.getReturnType()))
return false;
for (const auto & type : function.getArgumentTypes())
if (!toNativeType(builder, type))
return false;
return function.isCompilable();
}
void compileFunctions(ExpressionActions::Actions & actions, const Names & output_columns, const Block & sample_block)
{
struct LLVMTargetInitializer
{
LLVMTargetInitializer()
{
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
}
};
static LLVMTargetInitializer initializer;
auto context = std::make_shared<LLVMContext>();
/// an empty optional is a poisoned value prohibiting the column's producer from being removed
/// (which it could be, if it was inlined into every dependent function).
std::unordered_map<std::string, std::unordered_set<std::optional<size_t>>> current_dependents;
for (const auto & name : output_columns)
current_dependents[name].emplace();
/// a snapshot of each compilable function's dependents at the time of its execution.
std::vector<std::unordered_set<std::optional<size_t>>> dependents(actions.size());
for (size_t i = actions.size(); i--;)
{
switch (actions[i].type)
{
case ExpressionAction::REMOVE_COLUMN:
current_dependents.erase(actions[i].source_name);
/// poison every other column used after this point so that inlining chains do not cross it.
for (auto & dep : current_dependents)
dep.second.emplace();
break;
case ExpressionAction::PROJECT:
current_dependents.clear();
for (const auto & proj : actions[i].projection)
current_dependents[proj.first].emplace();
break;
case ExpressionAction::ADD_COLUMN:
case ExpressionAction::COPY_COLUMN:
case ExpressionAction::ARRAY_JOIN:
case ExpressionAction::JOIN:
{
Names columns = actions[i].getNeededColumns();
for (const auto & column : columns)
current_dependents[column].emplace();
break;
}
case ExpressionAction::APPLY_FUNCTION:
{
dependents[i] = current_dependents[actions[i].result_name];
const bool compilable = isCompilable(context->builder, *actions[i].function);
for (const auto & name : actions[i].argument_names)
{
if (compilable)
current_dependents[name].emplace(i);
else
current_dependents[name].emplace();
}
break;
}
}
}
std::vector<ExpressionActions::Actions> fused(actions.size());
for (size_t i = 0; i < actions.size(); ++i)
{
if (actions[i].type != ExpressionAction::APPLY_FUNCTION || !isCompilable(context->builder, *actions[i].function))
continue;
fused[i].push_back(actions[i]);
if (dependents[i].find({}) != dependents[i].end())
{
/// the result of compiling one function in isolation is pretty much the same as its `execute` method.
if (fused[i].size() == 1)
continue;
auto fn = std::make_shared<LLVMFunction>(std::move(fused[i]), context, sample_block);
actions[i].function = fn;
actions[i].argument_names = fn->getArgumentNames();
continue;
}
/// TODO: determine whether it's profitable to inline the function if there's more than one dependent.
for (const auto & dep : dependents[i])
fused[*dep].insert(fused[*dep].end(), fused[i].begin(), fused[i].end());
}
context->finalize();
}
}
#endif

View File

@ -0,0 +1,18 @@
#pragma once
#include <Common/config.h>
#if USE_EMBEDDED_COMPILER
#include <Interpreters/ExpressionActions.h>
namespace DB
{
/// For each APPLY_FUNCTION action, try to compile the function to native code; if the only uses of a compilable
/// function's result are as arguments to other compilable functions, inline it and leave the now-redundant action as-is.
void compileFunctions(ExpressionActions::Actions & actions, const Names & output_columns, const Block & sample_block);
}
#endif

View File

@ -50,7 +50,6 @@ namespace ErrorCodes
extern const int EMPTY_LIST_OF_COLUMNS_PASSED;
extern const int INCORRECT_QUERY;
extern const int ENGINE_REQUIRED;
extern const int TABLE_METADATA_ALREADY_EXISTS;
extern const int UNKNOWN_DATABASE_ENGINE;
extern const int DUPLICATE_COLUMN;
extern const int READONLY;

View File

@ -1,5 +1,5 @@
#include <Interpreters/InterpreterSystemQuery.h>
#include <Common/DNSCache.h>
#include <Common/DNSResolver.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExternalDictionaries.h>
#include <Interpreters/EmbeddedDictionaries.h>
@ -63,6 +63,10 @@ BlockIO InterpreterSystemQuery::execute()
using Type = ASTSystemQuery::Type;
/// Use global context with fresh system profile settings
Context system_context = context.getGlobalContext();
system_context.setSetting("profile", context.getSystemProfileName());
switch (query.type)
{
case Type::SHUTDOWN:
@ -74,31 +78,34 @@ BlockIO InterpreterSystemQuery::execute()
throwFromErrno("System call kill(0, SIGKILL) failed", ErrorCodes::CANNOT_KILL);
break;
case Type::DROP_DNS_CACHE:
DNSCache::instance().drop();
DNSResolver::instance().dropCache();
/// Reinitialize clusters to update their resolved_addresses
context.reloadClusterConfig();
system_context.reloadClusterConfig();
break;
case Type::DROP_MARK_CACHE:
context.dropMarkCache();
system_context.dropMarkCache();
break;
case Type::DROP_UNCOMPRESSED_CACHE:
context.dropUncompressedCache();
system_context.dropUncompressedCache();
break;
case Type::RELOAD_DICTIONARY:
context.getExternalDictionaries().reloadDictionary(query.target_dictionary);
system_context.getExternalDictionaries().reloadDictionary(query.target_dictionary);
break;
case Type::RELOAD_DICTIONARIES:
{
auto status = getOverallExecutionStatusOfCommands(
[&] { context.getExternalDictionaries().reload(); },
[&] { context.getEmbeddedDictionaries().reload(); }
[&] { system_context.getExternalDictionaries().reload(); },
[&] { system_context.getEmbeddedDictionaries().reload(); }
);
if (status.code != 0)
throw Exception(status.message, status.code);
break;
}
case Type::RELOAD_EMBEDDED_DICTIONARIES:
system_context.getEmbeddedDictionaries().reload();
break;
case Type::RELOAD_CONFIG:
context.reloadConfig();
system_context.reloadConfig();
break;
case Type::STOP_LISTEN_QUERIES:
case Type::START_LISTEN_QUERIES:

View File

@ -65,6 +65,7 @@ struct Settings
M(SettingFloat, totals_auto_threshold, 0.5, "The threshold for totals_mode = 'auto'.") \
\
M(SettingBool, compile, false, "Whether query compilation is enabled.") \
M(SettingBool, compile_expressions, false, "Compile some scalar functions and operators to native code.") \
M(SettingUInt64, min_count_to_compile, 3, "The number of structurally identical queries before they are compiled.") \
M(SettingUInt64, group_by_two_level_threshold, 100000, "From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.") \
M(SettingUInt64, group_by_two_level_threshold_bytes, 100000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.") \

Some files were not shown because too many files have changed in this diff Show More