Merge branch 'master' into test/crash_35551

This commit is contained in:
mergify[bot] 2022-04-04 03:59:10 +00:00 committed by GitHub
commit 4d9bb9f6fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
126 changed files with 2292 additions and 1047 deletions

View File

@ -16,7 +16,6 @@ Checks: '-*,
modernize-make-unique,
modernize-raw-string-literal,
modernize-redundant-void-arg,
modernize-replace-auto-ptr,
modernize-replace-random-shuffle,
modernize-use-bool-literals,
modernize-use-nullptr,

View File

@ -947,6 +947,34 @@ jobs:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
needs:
- BuilderDebRelease
- BuilderDebAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no version info
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head
python3 docker_server.py --release-type head --no-ubuntu \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:

View File

@ -4,7 +4,7 @@ env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
on: # yamllint disable-line rule:truthy
on: # yamllint disable-line rule:truthy
pull_request:
types:
- synchronize
@ -998,6 +998,34 @@ jobs:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
needs:
- BuilderDebRelease
- BuilderDebAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no version info
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head --no-push
python3 docker_server.py --release-type head --no-push --no-ubuntu \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
@ -3138,6 +3166,7 @@ jobs:
needs:
- StyleCheck
- DockerHubPush
- DockerServerImages
- CheckLabels
- BuilderReport
- FastTest

View File

@ -36,3 +36,28 @@ jobs:
overwrite: true
tag: ${{ github.ref }}
file_glob: true
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no version info
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type auto
python3 docker_server.py --release-type auto --no-ubuntu \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"

View File

@ -1,12 +1,9 @@
# During cross-compilation in our CI we have to use llvm-tblgen and other building tools
# tools to be build for host architecture and everything else for target architecture (e.g. AArch64)
# Possible workaround is to use llvm-tblgen from some package...
# But lets just enable LLVM for native builds
if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined")
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined")
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
else()
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
endif()
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
if (NOT ENABLE_EMBEDDED_COMPILER)

View File

@ -20,7 +20,7 @@ ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml
EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
[Install]
# ClickHouse should not start from the rescue shell (rescue.target).

72
docker/keeper/Dockerfile Normal file
View File

@ -0,0 +1,72 @@
FROM ubuntu:20.04 AS glibc-donor
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
esac \
&& ln -s "${rarch}-linux-gnu" /lib/linux-gnu
FROM alpine
ENV LANG=en_US.UTF-8 \
LANGUAGE=en_US:en \
LC_ALL=en_US.UTF-8 \
TZ=UTC \
CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/
COPY --from=glibc-donor /etc/nsswitch.conf /etc/
COPY entrypoint.sh /entrypoint.sh
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \
arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \
esac
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
ARG VERSION="22.4.1.917"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& for package in ${PACKAGES}; do \
{ \
{ echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \
} || \
{ echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \
} ; \
} || exit 1 \
; done \
&& rm /tmp/*.tgz /install -r \
&& addgroup -S -g 101 clickhouse \
&& adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse keeper" -u 101 clickhouse \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper \
&& chown clickhouse:clickhouse /var/lib/clickhouse \
&& chown root:clickhouse /var/log/clickhouse-keeper \
&& chmod +x /entrypoint.sh \
&& apk add --no-cache su-exec bash tzdata \
&& cp /usr/share/zoneinfo/UTC /etc/localtime \
&& echo "UTC" > /etc/timezone \
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper
EXPOSE 2181 10181 44444
VOLUME /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper
ENTRYPOINT ["/entrypoint.sh"]

View File

@ -0,0 +1 @@
Dockerfile

View File

@ -0,0 +1,93 @@
#!/bin/bash
set +x
set -eo pipefail
shopt -s nullglob
DO_CHOWN=1
if [ "${CLICKHOUSE_DO_NOT_CHOWN:-0}" = "1" ]; then
DO_CHOWN=0
fi
CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
# support --user
if [ "$(id -u)" = "0" ]; then
USER=$CLICKHOUSE_UID
GROUP=$CLICKHOUSE_GID
if command -v gosu &> /dev/null; then
gosu="gosu $USER:$GROUP"
elif command -v su-exec &> /dev/null; then
gosu="su-exec $USER:$GROUP"
else
echo "No gosu/su-exec detected!"
exit 1
fi
else
USER="$(id -u)"
GROUP="$(id -g)"
gosu=""
DO_CHOWN=0
fi
KEEPER_CONFIG="${KEEPER_CONFIG:-/etc/clickhouse-keeper/config.yaml}"
if [ -f "$KEEPER_CONFIG" ] && ! $gosu test -f "$KEEPER_CONFIG" -a -r "$KEEPER_CONFIG"; then
echo "Configuration file '$KEEPER_CONFIG' isn't readable by user with id '$USER'"
exit 1
fi
DATA_DIR="${CLICKHOUSE_DATA_DIR:-/var/lib/clickhouse}"
LOG_DIR="${LOG_DIR:-/var/log/clickhouse-keeper}"
LOG_PATH="${LOG_DIR}/clickhouse-keeper.log"
ERROR_LOG_PATH="${LOG_DIR}/clickhouse-keeper.err.log"
COORDINATION_LOG_DIR="${DATA_DIR}/coordination/log"
COORDINATION_SNAPSHOT_DIR="${DATA_DIR}/coordination/snapshots"
CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0}
for dir in "$DATA_DIR" \
"$LOG_DIR" \
"$TMP_DIR" \
"$COORDINATION_LOG_DIR" \
"$COORDINATION_SNAPSHOT_DIR"
do
# check if variable not empty
[ -z "$dir" ] && continue
# ensure directories exist
if ! mkdir -p "$dir"; then
echo "Couldn't create necessary directory: $dir"
exit 1
fi
if [ "$DO_CHOWN" = "1" ]; then
# ensure proper directories permissions
# but skip it for if directory already has proper premissions, cause recursive chown may be slow
if [ "$(stat -c %u "$dir")" != "$USER" ] || [ "$(stat -c %g "$dir")" != "$GROUP" ]; then
chown -R "$USER:$GROUP" "$dir"
fi
elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then
echo "Necessary directory '$dir' isn't accessible by user with id '$USER'"
exit 1
fi
done
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
# Watchdog is launched by default, but does not send SIGINT to the main process,
# so the container can't be finished by ctrl+c
export CLICKHOUSE_WATCHDOG_ENABLE
cd /var/lib/clickhouse
# There is a config file. It is already tested with gosu (if it is readably by keeper user)
if [ -f "$KEEPER_CONFIG" ]; then
exec $gosu /usr/bin/clickhouse-keeper --config-file="$KEEPER_CONFIG" --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@"
fi
# There is no config file. Will use embedded one
exec $gosu /usr/bin/clickhouse-keeper --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@"
fi
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
exec "$@"

View File

@ -1,2 +0,0 @@
alpine-root/*
tgz-packages/*

View File

@ -1,122 +0,0 @@
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ARG repository="deb https://packages.clickhouse.com/deb stable main"
ARG version=22.1.1.*
# set non-empty deb_location_url url to create a docker image
# from debs created by CI build, for example:
# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852
ARG deb_location_url=""
# set non-empty single_binary_location_url to create docker image
# from a single binary url (useful for non-standard builds - with sanitizers, for arm64).
# for example (run on aarch64 server):
# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.com/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm
# note: clickhouse-odbc-bridge is not supported there.
ARG single_binary_location_url=""
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
ARG DEBIAN_FRONTEND=noninteractive
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
# To drop privileges, we need 'su' command, that simply changes uid and gid.
# In fact, the 'su' command from Linux is not so simple, due to inherent vulnerability in Linux:
# https://ruderich.org/simon/notes/su-sudo-from-root-tty-hijacking
# It has to mitigate this drawback of Linux, and to do this, 'su' command is creating it's own pseudo-terminal
# and forwarding commands. Due to some ridiculous curcumstances, it does not work in Docker (or it does)
# and for these reasons people are using alternatives to the 'su' command in Docker,
# that don't mess with the terminal, don't care about closing the opened files, etc...
# but can only be safe to drop privileges inside Docker.
# The question - what implementation of 'su' command to use.
# It should be a simple script doing about just two syscalls.
# Some people tend to use 'gosu' tool that is written in Go.
# It is not used for several reasons:
# 1. Dependency on some foreign code in yet another programming language - does not sound alright.
# 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners.
COPY su-exec.c /su-exec.c
RUN groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
&& apt-get update \
&& apt-get install --yes --no-install-recommends \
apt-transport-https \
ca-certificates \
dirmngr \
gnupg \
locales \
wget \
tzdata \
&& mkdir -p /etc/apt/sources.list.d \
&& apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \
&& echo $repository > /etc/apt/sources.list.d/clickhouse.list \
&& if [ -n "$deb_location_url" ]; then \
echo "installing from custom url with deb packages: $deb_location_url" \
rm -rf /tmp/clickhouse_debs \
&& mkdir -p /tmp/clickhouse_debs \
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-common-static_${version}_amd64.deb" -P /tmp/clickhouse_debs \
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-client_${version}_all.deb" -P /tmp/clickhouse_debs \
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-server_${version}_all.deb" -P /tmp/clickhouse_debs \
&& dpkg -i /tmp/clickhouse_debs/*.deb ; \
elif [ -n "$single_binary_location_url" ]; then \
echo "installing from single binary url: $single_binary_location_url" \
&& rm -rf /tmp/clickhouse_binary \
&& mkdir -p /tmp/clickhouse_binary \
&& wget --progress=bar:force:noscroll "$single_binary_location_url" -O /tmp/clickhouse_binary/clickhouse \
&& chmod +x /tmp/clickhouse_binary/clickhouse \
&& /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \
else \
echo "installing from repository: $repository" \
&& apt-get update \
&& apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \
&& apt-get install --allow-unauthenticated --yes --no-install-recommends \
clickhouse-common-static=$version \
clickhouse-client=$version \
clickhouse-server=$version ; \
fi \
&& apt-get install -y --no-install-recommends tcc libc-dev && \
tcc /su-exec.c -o /bin/su-exec && \
chown root:root /bin/su-exec && \
chmod 0755 /bin/su-exec && \
rm /su-exec.c && \
apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \
&& clickhouse-local -q 'SELECT * FROM system.build_options' \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
/tmp/* \
&& apt-get clean \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
# we need to allow "others" access to clickhouse folder, because docker container
# can be started with arbitrary uid (openshift usecase)
RUN locale-gen en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8
ENV TZ UTC
RUN mkdir /docker-entrypoint-initdb.d
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
EXPOSE 9000 8123 9009
VOLUME /var/lib/clickhouse
ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml
ENTRYPOINT ["/entrypoint.sh"]

1
docker/server/Dockerfile Symbolic link
View File

@ -0,0 +1 @@
Dockerfile.ubuntu

View File

@ -1,3 +1,14 @@
FROM ubuntu:20.04 AS glibc-donor
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
esac \
&& ln -s "${rarch}-linux-gnu" /lib/linux-gnu
FROM alpine
ENV LANG=en_US.UTF-8 \
@ -6,7 +17,24 @@ ENV LANG=en_US.UTF-8 \
TZ=UTC \
CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml
COPY alpine-root/ /
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/
COPY --from=glibc-donor /etc/nsswitch.conf /etc/
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
COPY entrypoint.sh /entrypoint.sh
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \
arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \
esac
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="20.9.3.45"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
@ -15,9 +43,23 @@ COPY alpine-root/ /
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
RUN addgroup -S -g 101 clickhouse \
RUN arch=${TARGETARCH:-amd64} \
&& for package in ${PACKAGES}; do \
{ \
{ echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \
} || \
{ echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \
} ; \
} || exit 1 \
; done \
&& rm /tmp/*.tgz /install -r \
&& addgroup -S -g 101 clickhouse \
&& adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse server" -u 101 clickhouse \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server/config.d /etc/clickhouse-server/users.d /etc/clickhouse-client /docker-entrypoint-initdb.d \
&& chown clickhouse:clickhouse /var/lib/clickhouse \
&& chown root:clickhouse /var/log/clickhouse-server \
&& chmod +x /entrypoint.sh \

View File

@ -0,0 +1,128 @@
FROM ubuntu:20.04
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
ARG DEBIAN_FRONTEND=noninteractive
COPY su-exec.c /su-exec.c
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list \
&& groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
&& apt-get update \
&& apt-get install --yes --no-install-recommends \
apt-transport-https \
ca-certificates \
dirmngr \
gnupg \
locales \
wget \
tzdata \
&& apt-get install -y --no-install-recommends tcc libc-dev && \
tcc /su-exec.c -o /bin/su-exec && \
chown root:root /bin/su-exec && \
chmod 0755 /bin/su-exec && \
rm /su-exec.c && \
apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \
&& apt-get clean
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION=22.1.1.*
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image
# from debs created by CI build, for example:
# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852
ARG deb_location_url=""
# set non-empty single_binary_location_url to create docker image
# from a single binary url (useful for non-standard builds - with sanitizers, for arm64).
# for example (run on aarch64 server):
# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.com/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm
# note: clickhouse-odbc-bridge is not supported there.
ARG single_binary_location_url=""
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
# To drop privileges, we need 'su' command, that simply changes uid and gid.
# In fact, the 'su' command from Linux is not so simple, due to inherent vulnerability in Linux:
# https://ruderich.org/simon/notes/su-sudo-from-root-tty-hijacking
# It has to mitigate this drawback of Linux, and to do this, 'su' command is creating it's own pseudo-terminal
# and forwarding commands. Due to some ridiculous curcumstances, it does not work in Docker (or it does)
# and for these reasons people are using alternatives to the 'su' command in Docker,
# that don't mess with the terminal, don't care about closing the opened files, etc...
# but can only be safe to drop privileges inside Docker.
# The question - what implementation of 'su' command to use.
# It should be a simple script doing about just two syscalls.
# Some people tend to use 'gosu' tool that is written in Go.
# It is not used for several reasons:
# 1. Dependency on some foreign code in yet another programming language - does not sound alright.
# 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners.
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& if [ -n "${deb_location_url}" ]; then \
echo "installing from custom url with deb packages: ${deb_location_url}" \
rm -rf /tmp/clickhouse_debs \
&& mkdir -p /tmp/clickhouse_debs \
&& for package in ${PACKAGES}; do \
{ wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_${arch}.deb" -P /tmp/clickhouse_debs || \
wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_all.deb" -P /tmp/clickhouse_debs ; } \
|| exit 1 \
; done \
&& dpkg -i /tmp/clickhouse_debs/*.deb ; \
elif [ -n "${single_binary_location_url}" ]; then \
echo "installing from single binary url: ${single_binary_location_url}" \
&& rm -rf /tmp/clickhouse_binary \
&& mkdir -p /tmp/clickhouse_binary \
&& wget --progress=bar:force:noscroll "${single_binary_location_url}" -O /tmp/clickhouse_binary/clickhouse \
&& chmod +x /tmp/clickhouse_binary/clickhouse \
&& /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \
else \
mkdir -p /etc/apt/sources.list.d \
&& apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \
&& echo ${REPOSITORY} > /etc/apt/sources.list.d/clickhouse.list \
&& echo "installing from repository: ${REPOSITORY}" \
&& apt-get update \
&& apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \
&& for package in ${PACKAGES}; do \
apt-get install --allow-unauthenticated --yes --no-install-recommends "${package}=${VERSION}" || exit 1 \
; done \
; fi \
&& clickhouse-local -q 'SELECT * FROM system.build_options' \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
/tmp/* \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
# we need to allow "others" access to clickhouse folder, because docker container
# can be started with arbitrary uid (openshift usecase)
RUN locale-gen en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8
ENV TZ UTC
RUN mkdir /docker-entrypoint-initdb.d
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
EXPOSE 9000 8123 9009
VOLUME /var/lib/clickhouse
ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml
ENTRYPOINT ["/entrypoint.sh"]

View File

@ -1,63 +0,0 @@
#!/bin/bash
set -x
REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc
REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}"
VERSION="${VERSION:-20.9.3.45}"
DOCKER_IMAGE="${DOCKER_IMAGE:-clickhouse/clickhouse-server}"
# where original files live
DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
# we will create root for our image here
CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root"
# clean up the root from old runs, it's reconstructed each time
rm -rf "$CONTAINER_ROOT_FOLDER"
mkdir -p "$CONTAINER_ROOT_FOLDER"
# where to put downloaded tgz
TGZ_PACKAGES_FOLDER="${DOCKER_BUILD_FOLDER}/tgz-packages"
mkdir -p "$TGZ_PACKAGES_FOLDER"
PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
# download tars from the repo
for package in "${PACKAGES[@]}"
do
wget -c -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
done
# unpack tars
for package in "${PACKAGES[@]}"
do
tar xvzf "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" --strip-components=2 -C "$CONTAINER_ROOT_FOLDER"
done
# prepare few more folders
mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \
"${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d" \
"${CONTAINER_ROOT_FOLDER}/var/log/clickhouse-server" \
"${CONTAINER_ROOT_FOLDER}/var/lib/clickhouse" \
"${CONTAINER_ROOT_FOLDER}/docker-entrypoint-initdb.d" \
"${CONTAINER_ROOT_FOLDER}/lib64"
cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/"
cp "${DOCKER_BUILD_FOLDER}/entrypoint.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"
## get glibc components from ubuntu 20.04 and put them to expected place
docker pull ubuntu:20.04
ubuntu20image=$(docker create --rm ubuntu:20.04)
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libc.so.6 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libdl.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libm.so.6 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/librt.so.1 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_files.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64"
docker cp -L "${ubuntu20image}":/etc/nsswitch.conf "${CONTAINER_ROOT_FOLDER}/etc"
docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull
rm -rf "$CONTAINER_ROOT_FOLDER"

View File

@ -1,47 +0,0 @@
# Since right now we can't set volumes to the docker during build, we split building container in stages:
# 1. build base container
# 2. run base conatiner with mounted volumes
# 3. commit container as image
# 4. build final container atop that image
# Middle steps are performed by the bash script.
FROM ubuntu:18.04 as clickhouse-server-base
ARG gosu_ver=1.14
VOLUME /packages/
# update to allow installing dependencies of clickhouse automatically
RUN apt update; \
DEBIAN_FRONTEND=noninteractive \
apt install -y locales;
ADD https://github.com/tianon/gosu/releases/download/${gosu_ver}/gosu-amd64 /bin/gosu
RUN locale-gen en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8
# installing via apt to simulate real-world scenario, where user installs deb package and all it's dependecies automatically.
CMD DEBIAN_FRONTEND=noninteractive \
apt install -y \
/packages/clickhouse-common-static_*.deb \
/packages/clickhouse-server_*.deb ;
FROM clickhouse-server-base:postinstall as clickhouse-server
RUN mkdir /docker-entrypoint-initdb.d
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x \
/entrypoint.sh \
/bin/gosu
EXPOSE 9000 8123 9009
VOLUME /var/lib/clickhouse
ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml
ENTRYPOINT ["/entrypoint.sh"]

View File

@ -1,86 +0,0 @@
#!/bin/sh
set -e -x
# Not sure why shellcheck complains that rc is not assigned before it is referenced.
# shellcheck disable=SC2154
trap 'rc=$?; echo EXITED WITH: $rc; exit $rc' EXIT
# CLI option to prevent rebuilding images, just re-run tests with images leftover from previuos time
readonly NO_REBUILD_FLAG="--no-rebuild"
readonly CLICKHOUSE_DOCKER_DIR="$(realpath "${1}")"
readonly CLICKHOUSE_PACKAGES_ARG="${2}"
CLICKHOUSE_SERVER_IMAGE="${3}"
if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then
readonly CLICKHOUSE_PACKAGES_DIR="$(realpath "${2}")" # or --no-rebuild
fi
# In order to allow packages directory to be anywhere, and to reduce amount of context sent to the docker daemon,
# all images are built in multiple stages:
# 1. build base image, install dependencies
# 2. run image with volume mounted, install what needed from those volumes
# 3. tag container as image
# 4. [optional] build another image atop of tagged.
# TODO: optionally mount most recent clickhouse-test and queries directory from local machine
if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then
docker build --network=host \
-f "${CLICKHOUSE_DOCKER_DIR}/test/stateless/clickhouse-statelest-test-runner.Dockerfile" \
--target clickhouse-test-runner-base \
-t clickhouse-test-runner-base:preinstall \
"${CLICKHOUSE_DOCKER_DIR}/test/stateless"
docker rm -f clickhouse-test-runner-installing-packages || true
docker run --network=host \
-v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \
--name clickhouse-test-runner-installing-packages \
clickhouse-test-runner-base:preinstall
docker commit clickhouse-test-runner-installing-packages clickhouse-statelest-test-runner:local
docker rm -f clickhouse-test-runner-installing-packages || true
fi
# # Create a bind-volume to the clickhouse-test script file
# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/tests/clickhouse-test --opt o=bind clickhouse-test-script-volume
# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/tests/queries --opt o=bind clickhouse-test-queries-dir-volume
# Build server image (optional) from local packages
if [ -z "${CLICKHOUSE_SERVER_IMAGE}" ]; then
CLICKHOUSE_SERVER_IMAGE="clickhouse/server:local"
if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then
docker build --network=host \
-f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \
--target clickhouse-server-base \
-t clickhouse-server-base:preinstall \
"${CLICKHOUSE_DOCKER_DIR}/server"
docker rm -f clickhouse_server_base_installing_server || true
docker run --network=host -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \
--name clickhouse_server_base_installing_server \
clickhouse-server-base:preinstall
docker commit clickhouse_server_base_installing_server clickhouse-server-base:postinstall
docker build --network=host \
-f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \
--target clickhouse-server \
-t "${CLICKHOUSE_SERVER_IMAGE}" \
"${CLICKHOUSE_DOCKER_DIR}/server"
fi
fi
docker rm -f test-runner || true
docker-compose down
CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \
docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \
create \
--build --force-recreate
CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \
docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \
run \
--name test-runner \
test-runner

View File

@ -1,34 +0,0 @@
version: "2"
services:
clickhouse-server:
image: ${CLICKHOUSE_SERVER_IMAGE}
expose:
- "8123" # HTTP
- "9000" # TCP
- "9009" # HTTP-interserver
restart: "no"
test-runner:
image: clickhouse-statelest-test-runner:local
restart: "no"
depends_on:
- clickhouse-server
environment:
# these are used by clickhouse-test to point clickhouse-client to the right server
- CLICKHOUSE_HOST=clickhouse-server
- CLICKHOUSE_PORT=9009
- CLICKHOUSE_TEST_HOST_EXPOSED_PORT=51234
expose:
# port for any test to serve data to clickhouse-server on rare occasion (like URL-engine tables in 00646),
# should match value of CLICKHOUSE_TEST_HOST_EXPOSED_PORT above
- "51234"
# NOTE: Dev-mode: mount newest versions of the queries and clickhouse-test script into container.
# volumes:
# - /home/enmk/proj/ClickHouse_master/tests/queries:/usr/share/clickhouse-test/queries:ro
# - /home/enmk/proj/ClickHouse_master/tests/clickhouse-test:/usr/bin/clickhouse-test:ro
# String-form instead of list-form to allow multiple arguments in "${CLICKHOUSE_TEST_ARGS}"
entrypoint: "clickhouse-test ${CLICKHOUSE_TEST_ARGS}"

View File

@ -36,6 +36,7 @@ Example of configuration:
<access_key_id>AKIAIOSFODNN7EXAMPLE</access_key_id>
<secret_access_key> wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY</secret_access_key>
<format>CSV</format>
<url>https://s3.us-east-1.amazonaws.com/yourbucket/mydata/</url>
</s3_mydata>
</named_collections>
</clickhouse>
@ -44,12 +45,12 @@ Example of configuration:
### Example of using named connections with the s3 function
```sql
INSERT INTO FUNCTION s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz',
INSERT INTO FUNCTION s3(s3_mydata, filename = 'test_file.tsv.gz',
format = 'TSV', structure = 'number UInt64', compression_method = 'gzip')
SELECT * FROM numbers(10000);
SELECT count()
FROM s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz')
FROM s3(s3_mydata, filename = 'test_file.tsv.gz')
┌─count()─┐
│ 10000 │

View File

@ -114,9 +114,9 @@ In addition, this column is not substituted when using an asterisk in a SELECT q
### EPHEMERAL {#ephemeral}
`EPHEMERAL expr`
`EPHEMERAL [expr]`
Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement.
Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required.
INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
### ALIAS {#alias}

View File

@ -110,9 +110,9 @@ SELECT x, toTypeName(x) FROM t1;
### EPHEMERAL {#ephemeral}
`EPHEMERAL expr`
`EPHEMERAL [expr]`
Эфемерное выражение. Такой столбец не хранится в таблице и не может быть получен в запросе SELECT, но на него можно ссылаться в выражениях по умолчанию запроса CREATE.
Эфемерное выражение. Такой столбец не хранится в таблице и не может быть получен в запросе SELECT, но на него можно ссылаться в выражениях по умолчанию запроса CREATE. Если значение по умолчанию `expr` не указано, то тип колонки должен быть специфицирован.
INSERT без списка столбцов игнорирует этот столбец, таким образом сохраняется инвариант - т.е. дамп, полученный путём `SELECT *`, можно вставить обратно в таблицу INSERT-ом без указания списка столбцов.
### ALIAS {#alias}

View File

@ -16,7 +16,7 @@ jsmin==3.0.0
livereload==2.6.3
Markdown==3.3.2
MarkupSafe==2.1.0
mkdocs==1.1.2
mkdocs==1.3.0
mkdocs-htmlproofer-plugin==0.0.3
mkdocs-macros-plugin==0.4.20
nltk==3.7

View File

@ -20,7 +20,7 @@ ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml
EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
[Install]
# ClickHouse should not start from the rescue shell (rescue.target).

View File

@ -148,13 +148,13 @@
<!-- <interserver_https_port>9010</interserver_https_port> -->
<!-- Hostname that is used by other replicas to request this server.
If not specified, than it is determined analogous to 'hostname -f' command.
If not specified, then it is determined analogous to 'hostname -f' command.
This setting could be used to switch replication to another network interface
(the server may be connected to multiple networks via multiple addresses)
-->
<!--
<interserver_http_host>example.yandex.ru</interserver_http_host>
<interserver_http_host>example.clickhouse.com</interserver_http_host>
-->
<!-- You can specify credentials for authenthication between replicas.
@ -765,14 +765,14 @@
-->
<!--<remote_url_allow_hosts>-->
<!-- Host should be specified exactly as in URL. The name is checked before DNS resolution.
Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts.
Example: "clickhouse.com", "clickhouse.com." and "www.clickhouse.com" are different hosts.
If port is explicitly specified in URL, the host:port is checked as a whole.
If host specified here without port, any port with this host allowed.
"yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed.
"clickhouse.com" -> "clickhouse.com:443", "clickhouse.com:80" etc. is allowed, but "clickhouse.com:80" -> only "clickhouse.com:80" is allowed.
If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]".
If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked.
Host should be specified using the host xml tag:
<host>yandex.ru</host>
<host>clickhouse.com</host>
-->
<!-- Regular expression can be specified. RE2 engine is used for regexps.
@ -1030,25 +1030,17 @@
<flush_interval_milliseconds>1000</flush_interval_milliseconds>
</crash_log>
<!-- Session log. Stores user log in (successful or not) and log out events. -->
<session_log>
<!-- Session log. Stores user log in (successful or not) and log out events.
Note: session log has known security issues and should not be used in production.
-->
<!-- <session_log>
<database>system</database>
<table>session_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</session_log>
<!-- Parameters for embedded dictionaries, used in Yandex.Metrica.
See https://clickhouse.com/docs/en/dicts/internal_dicts/
-->
<!-- Path to file with region hierarchy. -->
<!-- <path_to_regions_hierarchy_file>/opt/geo/regions_hierarchy.txt</path_to_regions_hierarchy_file> -->
<!-- Path to directory with files containing names of regions -->
<!-- <path_to_regions_names_files>/opt/geo/</path_to_regions_names_files> -->
</session_log> -->
<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
<!-- Custom TLD lists.

View File

@ -103,7 +103,7 @@ interserver_http_port: 9009
# If not specified, than it is determined analogous to 'hostname -f' command.
# This setting could be used to switch replication to another network interface
# (the server may be connected to multiple networks via multiple addresses)
# interserver_http_host: example.yandex.ru
# interserver_http_host: example.clickhouse.com
# You can specify credentials for authenthication between replicas.
# This is required when interserver_https_port is accessible from untrusted networks,
@ -592,10 +592,10 @@ remote_servers:
# remote_url_allow_hosts:
# Host should be specified exactly as in URL. The name is checked before DNS resolution.
# Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts.
# Example: "clickhouse.com", "clickhouse.com." and "www.clickhouse.com" are different hosts.
# If port is explicitly specified in URL, the host:port is checked as a whole.
# If host specified here without port, any port with this host allowed.
# "yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed.
# "clickhouse.com" -> "clickhouse.com:443", "clickhouse.com:80" etc. is allowed, but "clickhouse.com:80" -> only "clickhouse.com:80" is allowed.
# If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]".
# If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked.
@ -803,16 +803,6 @@ crash_log:
partition_by: ''
flush_interval_milliseconds: 1000
# Parameters for embedded dictionaries, used in Yandex.Metrica.
# See https://clickhouse.com/docs/en/dicts/internal_dicts/
# Path to file with region hierarchy.
# path_to_regions_hierarchy_file: /opt/geo/regions_hierarchy.txt
# Path to directory with files containing names of regions
# path_to_regions_names_files: /opt/geo/
# top_level_domains_path: /var/lib/clickhouse/top_level_domains/
# Custom TLD lists.
# Format: name: /path/to/file

View File

@ -266,12 +266,25 @@
color: var(--null-color);
}
@keyframes hourglass-animation {
0% {
transform: rotate(-180deg);
}
50% {
transform: rotate(-180deg);
}
100% {
transform: none;
}
}
#hourglass
{
display: none;
padding-left: 1rem;
margin-left: 1rem;
font-size: 110%;
color: #888;
animation: hourglass-animation 1s linear infinite;
}
#check-mark
@ -457,7 +470,7 @@
}
document.getElementById('check-mark').style.display = 'none';
document.getElementById('hourglass').style.display = 'inline';
document.getElementById('hourglass').style.display = 'inline-block';
xhr.send(query);
}

View File

@ -79,9 +79,9 @@
Each element of list has one of the following forms:
<ip> IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
<host> Hostname. Example: server01.yandex.ru.
<host> Hostname. Example: server01.clickhouse.com.
To check access, DNS query is performed, and all received addresses compared to peer address.
<host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$
<host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.clickhouse\.com$
To check access, DNS PTR query is performed for peer address and then regexp is applied.
Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
Strongly recommended that regexp is ends with $

View File

@ -70,9 +70,9 @@ users:
# Each element of list has one of the following forms:
# ip: IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
# 2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
# host: Hostname. Example: server01.yandex.ru.
# host: Hostname. Example: server01.clickhouse.com.
# To check access, DNS query is performed, and all received addresses compared to peer address.
# host_regexp: Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$
# host_regexp: Regular expression for host names. Example, ^server\d\d-\d\d-\d\.clickhouse\.com$
# To check access, DNS PTR query is performed for peer address and then regexp is applied.
# Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
# Strongly recommended that regexp is ends with $ and take all expression in ''

View File

@ -182,6 +182,7 @@ enum class AccessType
M(JDBC, "", GLOBAL, SOURCES) \
M(HDFS, "", GLOBAL, SOURCES) \
M(S3, "", GLOBAL, SOURCES) \
M(HIVE, "", GLOBAL, SOURCES) \
M(SOURCES, "", GROUP, ALL) \
\
M(ALL, "ALL PRIVILEGES", GROUP, NONE) /* full access */ \

View File

@ -107,6 +107,11 @@ const QuotaTypeInfo & QuotaTypeInfo::get(QuotaType type)
static const auto info = make_info("EXECUTION_TIME", 1000000000 /* execution_time is stored in nanoseconds */);
return info;
}
case QuotaType::WRITTEN_BYTES:
{
static const auto info = make_info("WRITTEN_BYTES", 1);
return info;
}
case QuotaType::MAX: break;
}
throw Exception("Unexpected quota type: " + std::to_string(static_cast<int>(type)), ErrorCodes::LOGICAL_ERROR);

View File

@ -20,6 +20,7 @@ enum class QuotaType
READ_ROWS, /// Number of rows read from tables.
READ_BYTES, /// Number of bytes read from tables.
EXECUTION_TIME, /// Total amount of query execution time in nanoseconds.
WRITTEN_BYTES, /// Number of bytes written to tables.
MAX
};

View File

@ -13,7 +13,7 @@ namespace DB
{
namespace ErrorCodes
{
extern const int QUOTA_EXPIRED;
extern const int QUOTA_EXCEEDED;
}
@ -33,7 +33,7 @@ struct EnabledQuota::Impl
"Quota for user " + backQuote(user_name) + " for " + to_string(duration) + " has been exceeded: "
+ type_info.valueToStringWithName(used) + "/" + type_info.valueToString(max) + ". "
+ "Interval will end at " + to_string(end_of_interval) + ". " + "Name of quota template: " + backQuote(quota_name),
ErrorCodes::QUOTA_EXPIRED);
ErrorCodes::QUOTA_EXCEEDED);
}

View File

@ -208,7 +208,7 @@
M(198, DNS_ERROR) \
M(199, UNKNOWN_QUOTA) \
M(200, QUOTA_DOESNT_ALLOW_KEYS) \
M(201, QUOTA_EXPIRED) \
M(201, QUOTA_EXCEEDED) \
M(202, TOO_MANY_SIMULTANEOUS_QUERIES) \
M(203, NO_FREE_CONNECTION) \
M(204, CANNOT_FSYNC) \

View File

@ -241,6 +241,10 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t
std::lock_guard cache_lock(mutex);
#ifndef NDEBUG
assertCacheCorrectness(key, cache_lock);
#endif
/// Get all segments which intersect with the given range.
auto file_segments = getImpl(key, range, cache_lock);
@ -315,7 +319,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t
LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
const Key & key, size_t offset, size_t size, FileSegment::State state,
std::lock_guard<std::mutex> & /* cache_lock */)
std::lock_guard<std::mutex> & cache_lock)
{
/// Create a file segment cell and put it in `files` map by [key][offset].
@ -323,8 +327,10 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
return nullptr; /// Empty files are not cached.
if (files[key].contains(offset))
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache already exists for key: `{}`, offset: {}, size: {}", keyToStr(key), offset, size);
throw Exception(
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache already exists for key: `{}`, offset: {}, size: {}.\nCurrent cache structure: {}",
keyToStr(key), offset, size, dumpStructureImpl(key, cache_lock));
auto file_segment = std::make_shared<FileSegment>(offset, size, key, this, state);
FileSegmentCell cell(std::move(file_segment), queue);
@ -340,8 +346,10 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
auto [it, inserted] = offsets.insert({offset, std::move(cell)});
if (!inserted)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Failed to insert into cache key: `{}`, offset: {}, size: {}", keyToStr(key), offset, size);
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Failed to insert into cache key: `{}`, offset: {}, size: {}",
keyToStr(key), offset, size);
return &(it->second);
}
@ -523,8 +531,8 @@ void LRUFileCache::loadCacheInfoIntoMemory()
std::lock_guard cache_lock(mutex);
Key key;
UInt64 offset;
size_t size;
UInt64 offset = 0;
size_t size = 0;
std::vector<FileSegmentCell *> cells;
/// cache_base_path / key_prefix / key / offset
@ -687,22 +695,32 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
}
}
String LRUFileCache::dumpStructure(const Key & key_)
String LRUFileCache::dumpStructure(const Key & key)
{
std::lock_guard cache_lock(mutex);
return dumpStructureImpl(key, cache_lock);
}
String LRUFileCache::dumpStructureImpl(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
{
WriteBufferFromOwnString result;
for (auto it = queue.begin(); it != queue.end(); ++it)
{
auto [key, offset] = *it;
if (key == key_)
{
auto * cell = getCell(key, offset, cache_lock);
result << (it != queue.begin() ? ", " : "") << cell->file_segment->range().toString();
result << "(state: " << cell->file_segment->download_state << ")";
}
}
const auto & cells_by_offset = files[key];
for (const auto & [offset, cell] : cells_by_offset)
result << cell.file_segment->getInfoForLog() << "\n";
return result.str();
}
void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
{
const auto & cells_by_offset = files[key];
for (const auto & [_, cell] : cells_by_offset)
{
const auto & file_segment = cell.file_segment;
file_segment->assertCorrectness();
}
}
}

View File

@ -25,6 +25,7 @@ namespace DB
class IFileCache : private boost::noncopyable
{
friend class FileSegment;
friend struct FileSegmentsHolder;
public:
using Key = UInt128;
@ -196,6 +197,8 @@ private:
FileSegments splitRangeIntoEmptyCells(
const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
String dumpStructureImpl(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
public:
struct Stat
{
@ -208,6 +211,7 @@ public:
Stat getStat();
String dumpStructure(const Key & key_) override;
void assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock);
};
}

View File

@ -159,7 +159,18 @@ void FileSegment::setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_)
remote_file_reader = remote_file_reader_;
}
void FileSegment::write(const char * from, size_t size)
void FileSegment::resetRemoteFileReader()
{
if (!isDownloader())
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Only downloader can use remote filesystem file reader");
if (!remote_file_reader)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Remote file reader does not exist");
remote_file_reader.reset();
}
void FileSegment::write(const char * from, size_t size, size_t offset_)
{
if (!size)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing zero size is not allowed");
@ -174,8 +185,24 @@ void FileSegment::write(const char * from, size_t size)
"Only downloader can do the downloading. (CallerId: {}, DownloaderId: {})",
getCallerId(), downloader_id);
if (downloaded_size == range().size())
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Attempt to write {} bytes to offset: {}, but current file segment is already fully downloaded",
size, offset_);
auto download_offset = range().left + downloaded_size;
if (offset_ != download_offset)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Attempt to write {} bytes to offset: {}, but current download offset is {}",
size, offset_, download_offset);
if (!cache_writer)
{
if (downloaded_size > 0)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Cache writer was finalized (downloaded size: {}, state: {})",
downloaded_size, stateToString(download_state));
auto download_path = cache->getPathInLocalCache(key(), offset());
cache_writer = std::make_unique<WriteBufferFromFile>(download_path);
}
@ -190,19 +217,26 @@ void FileSegment::write(const char * from, size_t size)
downloaded_size += size;
}
catch (...)
catch (Exception & e)
{
std::lock_guard segment_lock(mutex);
LOG_ERROR(log, "Failed to write to cache. File segment info: {}", getInfoForLogImpl(segment_lock));
auto info = getInfoForLogImpl(segment_lock);
e.addMessage("while writing into cache, info: " + info);
LOG_ERROR(log, "Failed to write to cache. File segment info: {}", info);
download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION;
cache_writer->finalize();
cache_writer.reset();
cv.notify_all();
throw;
}
assert(getDownloadOffset() == offset_ + size);
}
FileSegment::State FileSegment::wait()
@ -270,7 +304,6 @@ void FileSegment::setDownloaded(std::lock_guard<std::mutex> & /* segment_lock */
download_state = State::DOWNLOADED;
is_downloaded = true;
assert(cache_writer);
if (cache_writer)
{
cache_writer->finalize();
@ -299,107 +332,125 @@ void FileSegment::completeBatchAndResetDownloader()
void FileSegment::complete(State state)
{
{
std::lock_guard segment_lock(mutex);
bool is_downloader = downloader_id == getCallerId();
if (!is_downloader)
{
cv.notify_all();
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"File segment can be completed only by downloader or downloader's FileSegmentsHodler");
}
if (state != State::DOWNLOADED
&& state != State::PARTIALLY_DOWNLOADED
&& state != State::PARTIALLY_DOWNLOADED_NO_CONTINUATION)
{
cv.notify_all();
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cannot complete file segment with state: {}", stateToString(state));
}
download_state = state;
}
completeImpl();
cv.notify_all();
}
void FileSegment::complete()
{
{
std::lock_guard segment_lock(mutex);
if (download_state == State::SKIP_CACHE || detached)
return;
if (download_state != State::DOWNLOADED && getDownloadedSize(segment_lock) == range().size())
setDownloaded(segment_lock);
if (download_state == State::DOWNLOADING || download_state == State::EMPTY)
download_state = State::PARTIALLY_DOWNLOADED;
}
completeImpl(true);
cv.notify_all();
}
void FileSegment::completeImpl(bool allow_non_strict_checking)
{
/// cache lock is always taken before segment lock.
std::lock_guard cache_lock(cache->mutex);
std::lock_guard segment_lock(mutex);
bool download_can_continue = false;
if (download_state == State::PARTIALLY_DOWNLOADED
|| download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION)
bool is_downloader = downloader_id == getCallerId();
if (!is_downloader)
{
bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
download_can_continue = !is_last_holder && download_state == State::PARTIALLY_DOWNLOADED;
cv.notify_all();
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"File segment can be completed only by downloader or downloader's FileSegmentsHodler");
}
if (!download_can_continue)
if (state != State::DOWNLOADED
&& state != State::PARTIALLY_DOWNLOADED
&& state != State::PARTIALLY_DOWNLOADED_NO_CONTINUATION)
{
cv.notify_all();
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cannot complete file segment with state: {}", stateToString(state));
}
download_state = state;
try
{
completeImpl(cache_lock, segment_lock);
}
catch (...)
{
if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true))
downloader_id.clear();
cv.notify_all();
throw;
}
cv.notify_all();
}
void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
{
std::lock_guard segment_lock(mutex);
if (download_state == State::SKIP_CACHE || detached)
return;
if (download_state != State::DOWNLOADED && getDownloadedSize(segment_lock) == range().size())
setDownloaded(segment_lock);
if (download_state == State::DOWNLOADING || download_state == State::EMPTY)
{
/// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the
/// downloader or the only owner of the segment.
bool can_update_segment_state = downloader_id == getCallerIdImpl(true)
|| cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
if (can_update_segment_state)
download_state = State::PARTIALLY_DOWNLOADED;
}
try
{
completeImpl(cache_lock, segment_lock, /* allow_non_strict_checking */true);
}
catch (...)
{
if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true))
downloader_id.clear();
cv.notify_all();
throw;
}
cv.notify_all();
}
void FileSegment::completeImpl(std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & segment_lock, bool allow_non_strict_checking)
{
bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
if (is_last_holder
&& (download_state == State::PARTIALLY_DOWNLOADED || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION))
{
size_t current_downloaded_size = getDownloadedSize(segment_lock);
if (current_downloaded_size == 0)
{
size_t current_downloaded_size = getDownloadedSize(segment_lock);
if (current_downloaded_size == 0)
{
download_state = State::SKIP_CACHE;
LOG_TEST(log, "Remove cell {} (nothing downloaded)", range().toString());
cache->remove(key(), offset(), cache_lock, segment_lock);
download_state = State::SKIP_CACHE;
LOG_TEST(log, "Remove cell {} (nothing downloaded)", range().toString());
cache->remove(key(), offset(), cache_lock, segment_lock);
}
else
{
/**
* Only last holder of current file segment can resize the cell,
* because there is an invariant that file segments returned to users
* in FileSegmentsHolder represent a contiguous range, so we can resize
* it only when nobody needs it.
*/
LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), current_downloaded_size);
cache->reduceSizeToDownloaded(key(), offset(), cache_lock, segment_lock);
}
detached = true;
}
else if (is_last_holder)
{
/**
* Only last holder of current file segment can resize the cell,
* because there is an invariant that file segments returned to users
* in FileSegmentsHolder represent a contiguous range, so we can resize
* it only when nobody needs it.
*/
LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), current_downloaded_size);
cache->reduceSizeToDownloaded(key(), offset(), cache_lock, segment_lock);
detached = true;
detached = true;
}
if (cache_writer)
{
cache_writer->finalize();
cache_writer.reset();
remote_file_reader.reset();
}
}
if (!downloader_id.empty() && downloader_id == getCallerIdImpl(allow_non_strict_checking))
if (!downloader_id.empty() && (downloader_id == getCallerIdImpl(allow_non_strict_checking) || is_last_holder))
{
LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state));
downloader_id.clear();
}
if (!download_can_continue && cache_writer)
{
cache_writer->finalize();
cache_writer.reset();
remote_file_reader.reset();
}
assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0);
assertCorrectnessImpl(segment_lock);
}
String FileSegment::getInfoForLog() const
@ -440,6 +491,53 @@ String FileSegment::stateToString(FileSegment::State state)
__builtin_unreachable();
}
void FileSegment::assertCorrectness() const
{
std::lock_guard segment_lock(mutex);
assertCorrectnessImpl(segment_lock);
}
void FileSegment::assertCorrectnessImpl(std::lock_guard<std::mutex> & /* segment_lock */) const
{
assert(downloader_id.empty() == (download_state != FileSegment::State::DOWNLOADING));
assert(!downloader_id.empty() == (download_state == FileSegment::State::DOWNLOADING));
assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0);
}
FileSegmentsHolder::~FileSegmentsHolder()
{
/// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from
/// FileSegmentsHolder right after calling file_segment->complete(), so on destruction here
/// remain only uncompleted file segments.
IFileCache * cache = nullptr;
for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();)
{
auto current_file_segment_it = file_segment_it;
auto & file_segment = *current_file_segment_it;
if (!cache)
cache = file_segment->cache;
try
{
/// File segment pointer must be reset right after calling complete() and
/// under the same mutex, because complete() checks for segment pointers.
std::lock_guard cache_lock(cache->mutex);
file_segment->complete(cache_lock);
file_segment_it = file_segments.erase(current_file_segment_it);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
assert(false);
}
}
}
String FileSegmentsHolder::toString()
{
String ranges;

View File

@ -95,12 +95,14 @@ public:
bool reserve(size_t size);
void write(const char * from, size_t size);
void write(const char * from, size_t size, size_t offset_);
RemoteFileReaderPtr getRemoteFileReader();
void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_);
void resetRemoteFileReader();
String getOrSetDownloader();
String getDownloader() const;
@ -121,16 +123,32 @@ public:
String getInfoForLog() const;
void assertCorrectness() const;
private:
size_t availableSize() const { return reserved_size - downloaded_size; }
bool lastFileSegmentHolder() const;
void complete();
void completeImpl(bool allow_non_strict_checking = false);
void setDownloaded(std::lock_guard<std::mutex> & segment_lock);
static String getCallerIdImpl(bool allow_non_strict_checking = false);
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
size_t getDownloadedSize(std::lock_guard<std::mutex> & segment_lock) const;
String getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock) const;
void assertCorrectnessImpl(std::lock_guard<std::mutex> & segment_lock) const;
void setDownloaded(std::lock_guard<std::mutex> & segment_lock);
bool lastFileSegmentHolder() const;
/// complete() without any completion state is called from destructor of
/// FileSegmentsHolder. complete() might check if the caller of the method
/// is the last alive holder of the segment. Therefore, complete() and destruction
/// of the file segment pointer must be done under the same cache mutex.
void complete(std::lock_guard<std::mutex> & cache_lock);
void completeImpl(
std::lock_guard<std::mutex> & cache_lock,
std::lock_guard<std::mutex> & segment_lock, bool allow_non_strict_checking = false);
static String getCallerIdImpl(bool allow_non_strict_checking = false);
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
const Range segment_range;
@ -169,28 +187,7 @@ struct FileSegmentsHolder : private boost::noncopyable
explicit FileSegmentsHolder(FileSegments && file_segments_) : file_segments(std::move(file_segments_)) {}
FileSegmentsHolder(FileSegmentsHolder && other) : file_segments(std::move(other.file_segments)) {}
~FileSegmentsHolder()
{
/// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from
/// FileSegmentsHolder right after calling file_segment->complete(), so on destruction here
/// remain only uncompleted file segments.
for (auto & segment : file_segments)
{
try
{
segment->complete();
}
catch (...)
{
#ifndef NDEBUG
throw;
#else
tryLogCurrentException(__PRETTY_FUNCTION__);
#endif
}
}
}
~FileSegmentsHolder();
FileSegments file_segments{};

View File

@ -67,7 +67,7 @@ void download(DB::FileSegmentPtr file_segment)
fs::create_directories(subdir);
std::string data(size, '0');
file_segment->write(data.data(), size);
file_segment->write(data.data(), size, file_segment->getDownloadOffset());
}
void prepareAndDownload(DB::FileSegmentPtr file_segment)

View File

@ -63,7 +63,11 @@ void CachedReadBufferFromRemoteFS::initialize(size_t offset, size_t size)
SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getCacheReadBuffer(size_t offset) const
{
return std::make_shared<ReadBufferFromFile>(cache->getPathInLocalCache(cache_key, offset), settings.local_fs_buffer_size);
auto path = cache->getPathInLocalCache(cache_key, offset);
auto buf = std::make_shared<ReadBufferFromFile>(path, settings.local_fs_buffer_size);
if (buf->size() == 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read from an empty cache file: {}", path);
return buf;
}
SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getRemoteFSReadBuffer(FileSegmentPtr & file_segment, ReadType read_type_)
@ -96,7 +100,6 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getRemoteFSReadBuffer(FileSe
remote_fs_segment_reader = remote_file_reader_creator();
file_segment->setRemoteFileReader(remote_fs_segment_reader);
///TODO: add check for pending data
return remote_fs_segment_reader;
}
case ReadType::REMOTE_FS_READ_BYPASS_CACHE:
@ -119,7 +122,6 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment(
{
auto range = file_segment->range();
/// Each wait() call has a timeout of 1 second.
size_t wait_download_max_tries = settings.remote_fs_cache_max_wait_sec;
size_t wait_download_tries = 0;
@ -296,17 +298,21 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
{
case ReadType::CACHED:
{
#ifndef NDEBUG
auto * file_reader = assert_cast<ReadBufferFromFile *>(read_buffer_for_file_segment.get());
size_t file_size = file_reader->size();
if (file_size == 0 || range.left + file_size <= file_offset_of_buffer_end)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Unexpected state of cache file. Cache file size: {}, cache file offset: {}, "
"expected file size to be non-zero and file downloaded size to exceed current file read offset (expected: {} > {})",
file_size, range.left, range.left + file_size, file_offset_of_buffer_end);
#endif
size_t seek_offset = file_offset_of_buffer_end - range.left;
read_buffer_for_file_segment->seek(seek_offset, SEEK_SET);
auto * file_reader = assert_cast<ReadBufferFromFile *>(read_buffer_for_file_segment.get());
size_t file_size = file_reader->size();
auto state = file_segment->state();
LOG_TEST(log, "Cache file: {}. Cached seek to: {}, file size: {}, file segment state: {}, download offset: {}",
file_reader->getFileName(), seek_offset, file_size, state, file_segment->getDownloadOffset());
assert(file_size > 0);
break;
}
case ReadType::REMOTE_FS_READ_BYPASS_CACHE:
@ -384,6 +390,7 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment)
LOG_TEST(log, "Bytes to predownload: {}, caller_id: {}", bytes_to_predownload, FileSegment::getCallerId());
assert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment->getDownloadOffset());
size_t current_offset = file_segment->getDownloadOffset();
while (true)
{
@ -423,7 +430,11 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment)
{
LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, implementation_buffer->buffer().size());
file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size);
assert(file_segment->getDownloadOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size, current_offset);
current_offset += current_predownload_size;
bytes_to_predownload -= current_predownload_size;
implementation_buffer->position() += current_predownload_size;
@ -537,13 +548,15 @@ bool CachedReadBufferFromRemoteFS::nextImpl()
}
catch (Exception & e)
{
e.addMessage("Cache info: {}", getInfoForLog());
e.addMessage("Cache info: {}", nextimpl_step_log_info);
throw;
}
}
bool CachedReadBufferFromRemoteFS::nextImplStep()
{
last_caller_id = FileSegment::getCallerId();
if (IFileCache::shouldBypassCache())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Using cache when not allowed");
@ -554,6 +567,9 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
return false;
SCOPE_EXIT({
/// Save state of current file segment before it is completed.
nextimpl_step_log_info = getInfoForLog();
if (current_file_segment_it == file_segments_holder->file_segments.end())
return;
@ -623,6 +639,18 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
if (!result)
{
#ifndef NDEBUG
if (auto * cache_file_reader = typeid_cast<ReadBufferFromFile *>(implementation_buffer.get()))
{
auto cache_file_size = cache_file_reader->size();
if (cache_file_size == 0)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Attempt to read from an empty cache file: {} (just before actual read)",
cache_file_size);
}
#endif
result = implementation_buffer->next();
size = implementation_buffer->buffer().size();
}
@ -635,7 +663,12 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
if (file_segment->reserve(size))
{
file_segment->write(needed_to_predownload ? implementation_buffer->position() : implementation_buffer->buffer().begin(), size);
assert(file_segment->getDownloadOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
file_segment->write(needed_to_predownload ? implementation_buffer->position() : implementation_buffer->buffer().begin(), size, file_offset_of_buffer_end);
assert(file_segment->getDownloadOffset() <= file_segment->range().right + 1);
assert(std::next(current_file_segment_it) == file_segments_holder->file_segments.end() || file_segment->getDownloadOffset() == implementation_buffer->getFileOffsetOfBufferEnd());
}
else
{
@ -665,10 +698,15 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
}
}
/// - If last file segment was read from remote fs, then we read up to segment->range().right, but
/// the requested right boundary could be segment->range().left < requested_right_boundary < segment->range().right.
/// Therefore need to resize to a smaller size. And resize must be done after write into cache.
/// - If last file segment was read from local fs, then we could read more than file_segemnt->range().right, so resize is also needed.
if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end())
{
size_t remaining_size_to_read = std::min(current_read_range.right, read_until_position - 1) - file_offset_of_buffer_end + 1;
size = std::min(size, remaining_size_to_read);
assert(implementation_buffer->buffer().size() >= nextimpl_working_buffer_offset + size);
implementation_buffer->buffer().resize(nextimpl_working_buffer_offset + size);
}
@ -692,9 +730,16 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
read_until_position, first_offset, file_segments_holder->toString());
if (size == 0 && file_offset_of_buffer_end < read_until_position)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Having zero bytes, but range is not finished: file offset: {}, reading until: {}",
file_offset_of_buffer_end, read_until_position);
{
std::optional<size_t> cache_file_size;
if (auto * cache_file_reader = assert_cast<ReadBufferFromFile *>(implementation_buffer.get()))
cache_file_size = cache_file_reader->size();
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Having zero bytes, but range is not finished: file offset: {}, reading until: {}, read type: {}, cache file size: {}",
file_offset_of_buffer_end, read_until_position, toString(read_type), cache_file_size ? std::to_string(*cache_file_size) : "None");
}
return result;
}
@ -757,12 +802,24 @@ std::optional<size_t> CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset()
String CachedReadBufferFromRemoteFS::getInfoForLog()
{
return fmt::format("Buffer path: {}, hash key: {}, file_offset_of_buffer_end: {}, internal buffer remaining read range: {}, file segment info: {}",
remote_fs_object_path, getHexUIntLowercase(cache_key), file_offset_of_buffer_end,
(implementation_buffer ?
std::to_string(implementation_buffer->getRemainingReadRange().left) + '-' + (implementation_buffer->getRemainingReadRange().right ? std::to_string(*implementation_buffer->getRemainingReadRange().right) : "None")
: "None"),
(current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog()));
auto implementation_buffer_read_range_str =
implementation_buffer ?
std::to_string(implementation_buffer->getRemainingReadRange().left)
+ '-'
+ (implementation_buffer->getRemainingReadRange().right ? std::to_string(*implementation_buffer->getRemainingReadRange().right) : "None")
: "None";
auto current_file_segment_info = current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog();
return fmt::format("Buffer path: {}, hash key: {}, file_offset_of_buffer_end: {}, internal buffer remaining read range: {}, "
"read_type: {}, last caller: {}, file segment info: {}",
remote_fs_object_path,
getHexUIntLowercase(cache_key),
file_offset_of_buffer_end,
implementation_buffer_read_range_str,
toString(read_type),
last_caller_id,
current_file_segment_info);
}
}

View File

@ -98,7 +98,10 @@ private:
}
__builtin_unreachable();
}
size_t first_offset = 0;
String nextimpl_step_log_info;
String last_caller_id;
};
}

View File

@ -68,16 +68,28 @@ std::future<IAsynchronousReader::Result> ThreadPoolRemoteFSReader::submit(Reques
auto * remote_fs_fd = assert_cast<RemoteFSFileDescriptor *>(request.descriptor.get());
Stopwatch watch(CLOCK_MONOTONIC);
auto [bytes_read, offset] = remote_fs_fd->readInto(request.buf, request.size, request.offset, request.ignore);
ReadBufferFromRemoteFSGather::ReadResult result;
try
{
result = remote_fs_fd->readInto(request.buf, request.size, request.offset, request.ignore);
}
catch (...)
{
if (running_group)
CurrentThread::detachQuery();
throw;
}
watch.stop();
ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, bytes_read);
if (running_group)
thread_status.detachQuery();
CurrentThread::detachQuery();
return Result{ .size = bytes_read, .offset = offset };
ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, result.offset ? result.size - result.offset : result.size);
return Result{ .size = result.size, .offset = result.offset };
});
auto future = task->get_future();

View File

@ -20,6 +20,7 @@
#include <Common/SipHash.h>
#include <Common/FieldVisitorHash.h>
#include <Access/Common/AccessFlags.h>
#include <Access/EnabledQuota.h>
#include <Formats/FormatFactory.h>
#include <base/logger_useful.h>
@ -197,6 +198,9 @@ void AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
copyData(*read_buf, write_buf);
}
if (auto quota = query_context->getQuota())
quota->used(QuotaType::WRITTEN_BYTES, bytes.size());
auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId());
InsertQuery key{query, settings};

View File

@ -69,6 +69,7 @@ public:
/// All below are parameters related to initial query.
Interface interface = Interface::TCP;
bool is_secure = false;
/// For tcp
String os_user;

View File

@ -1092,6 +1092,17 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
if (!res)
{
TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
if (table_function_ptr->needStructureHint())
{
const auto & insertion_table = getInsertionTable();
if (!insertion_table.empty())
{
const auto & structure_hint
= DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
table_function_ptr->setStructureHint(structure_hint);
}
}
res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
/// Since ITableFunction::parseArguments() may change table_expression, i.e.:

View File

@ -15,6 +15,7 @@
#include <Common/isLocalAddress.h>
#include <base/types.h>
#include <Storages/MergeTree/ParallelReplicasReadingCoordinator.h>
#include <Storages/ColumnsDescription.h>
#include "config_core.h"

View File

@ -508,7 +508,9 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
default_expr_list->children.emplace_back(
setAlias(
col_decl.default_expression->clone(),
col_decl.default_specifier == "EPHEMERAL" ? /// can be ASTLiteral::value NULL
std::make_shared<ASTLiteral>(data_type_ptr->getDefault()) :
col_decl.default_expression->clone(),
tmp_column_name));
}
else
@ -536,7 +538,11 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
if (col_decl.default_expression)
{
ASTPtr default_expr = col_decl.default_expression->clone();
ASTPtr default_expr =
col_decl.default_specifier == "EPHEMERAL" && col_decl.default_expression->as<ASTLiteral>()->value.isNull() ?
std::make_shared<ASTLiteral>(DataTypeFactory::instance().get(col_decl.type)->getDefault()) :
col_decl.default_expression->clone();
if (col_decl.type)
column.type = name_type_it->type;
else

View File

@ -1,6 +1,7 @@
#include <Interpreters/InterpreterInsertQuery.h>
#include <Access/Common/AccessFlags.h>
#include <Access/EnabledQuota.h>
#include <Columns/ColumnNullable.h>
#include <Processors/Transforms/buildPushingToViewsChain.h>
#include <DataTypes/DataTypeNullable.h>
@ -51,6 +52,8 @@ InterpreterInsertQuery::InterpreterInsertQuery(
, async_insert(async_insert_)
{
checkStackSize();
if (auto quota = getContext()->getQuota())
quota->checkExceeded(QuotaType::WRITTEN_BYTES);
}
@ -269,7 +272,7 @@ Chain InterpreterInsertQuery::buildChainImpl(
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0));
}
auto counting = std::make_shared<CountingTransform>(out.getInputHeader(), thread_status);
auto counting = std::make_shared<CountingTransform>(out.getInputHeader(), thread_status, getContext()->getQuota());
counting->setProcessListElement(context_ptr->getProcessListElement());
out.addSource(std::move(counting));

View File

@ -86,6 +86,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
{"initial_query_start_time", std::make_shared<DataTypeDateTime>()},
{"initial_query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"interface", std::make_shared<DataTypeUInt8>()},
{"is_secure", std::make_shared<DataTypeUInt8>()},
{"os_user", std::make_shared<DataTypeString>()},
{"client_hostname", std::make_shared<DataTypeString>()},
{"client_name", std::make_shared<DataTypeString>()},
@ -275,6 +276,7 @@ void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableCo
columns[i++]->insert(client_info.initial_query_start_time_microseconds);
columns[i++]->insert(UInt64(client_info.interface));
columns[i++]->insert(static_cast<UInt64>(client_info.is_secure));
columns[i++]->insert(client_info.os_user);
columns[i++]->insert(client_info.client_hostname);

View File

@ -56,6 +56,7 @@ NamesAndTypesList QueryThreadLogElement::getNamesAndTypes()
{"initial_query_start_time", std::make_shared<DataTypeDateTime>()},
{"initial_query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
{"interface", std::make_shared<DataTypeUInt8>()},
{"is_secure", std::make_shared<DataTypeUInt8>()},
{"os_user", std::make_shared<DataTypeString>()},
{"client_hostname", std::make_shared<DataTypeString>()},
{"client_name", std::make_shared<DataTypeString>()},

View File

@ -243,7 +243,7 @@ void Session::shutdownNamedSessions()
NamedSessionsStorage::instance().shutdown();
}
Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_)
Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure)
: auth_id(UUIDHelpers::generateV4()),
global_context(global_context_),
interface(interface_),
@ -251,6 +251,7 @@ Session::Session(const ContextPtr & global_context_, ClientInfo::Interface inter
{
prepared_client_info.emplace();
prepared_client_info->interface = interface_;
prepared_client_info->is_secure = is_secure;
}
Session::~Session()

View File

@ -32,7 +32,7 @@ public:
/// Stops using named sessions. The method must be called at the server shutdown.
static void shutdownNamedSessions();
Session(const ContextPtr & global_context_, ClientInfo::Interface interface_);
Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure = false);
~Session();
Session(const Session &&) = delete;

View File

@ -1,6 +1,7 @@
#include <Parsers/ASTColumnDeclaration.h>
#include <Common/quoteString.h>
#include <IO/Operators.h>
#include <Parsers/ASTLiteral.h>
namespace DB
@ -71,8 +72,12 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta
if (default_expression)
{
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "") << ' ';
default_expression->formatImpl(settings, state, frame);
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "");
if (default_specifier != "EPHEMERAL" || !default_expression->as<ASTLiteral>()->value.isNull())
{
settings.ostr << ' ';
default_expression->formatImpl(settings, state, frame);
}
}
if (comment)

View File

@ -505,32 +505,34 @@ namespace
bool parseExtract(IParser::Pos & pos, ASTPtr & node, Expected & expected)
{
ASTPtr expr;
IParser::Pos begin = pos;
IntervalKind interval_kind;
if (!parseIntervalKind(pos, expected, interval_kind))
{
ASTPtr expr_list;
if (!ParserExpressionList(false, false).parse(pos, expr_list, expected))
return false;
auto res = std::make_shared<ASTFunction>();
res->name = "extract";
res->arguments = expr_list;
res->children.push_back(res->arguments);
node = std::move(res);
return true;
if (parseIntervalKind(pos, expected, interval_kind))
{
ASTPtr expr;
ParserKeyword s_from("FROM");
ParserExpression elem_parser;
if (s_from.ignore(pos, expected) && elem_parser.parse(pos, expr, expected))
{
node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), expr);
return true;
}
}
ParserKeyword s_from("FROM");
if (!s_from.ignore(pos, expected))
pos = begin;
ASTPtr expr_list;
if (!ParserExpressionList(false, false).parse(pos, expr_list, expected))
return false;
ParserExpression elem_parser;
if (!elem_parser.parse(pos, expr, expected))
return false;
node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), expr);
auto res = std::make_shared<ASTFunction>();
res->name = "extract";
res->arguments = expr_list;
res->children.push_back(res->arguments);
node = std::move(res);
return true;
}

View File

@ -9,6 +9,7 @@
#include <Parsers/CommonParsers.h>
#include <Parsers/ParserDataType.h>
#include <Poco/String.h>
#include <Parsers/ASTLiteral.h>
namespace DB
@ -185,8 +186,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
}
Pos pos_before_specifier = pos;
if (s_default.ignore(pos, expected) || s_materialized.ignore(pos, expected) ||
s_ephemeral.ignore(pos, expected) || s_alias.ignore(pos, expected))
if (s_default.ignore(pos, expected) || s_materialized.ignore(pos, expected) || s_alias.ignore(pos, expected))
{
default_specifier = Poco::toUpper(std::string{pos_before_specifier->begin, pos_before_specifier->end});
@ -194,6 +194,12 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
if (!expr_parser.parse(pos, default_expression, expected))
return false;
}
else if (s_ephemeral.ignore(pos, expected))
{
default_specifier = "EPHEMERAL";
if (!expr_parser.parse(pos, default_expression, expected) && type)
default_expression = std::make_shared<ASTLiteral>(Field());
}
if (require_type && !type && !default_expression)
return false; /// reject column name without type

View File

@ -7,7 +7,7 @@ namespace DB
namespace ErrorCodes
{
extern const int TOO_MANY_ROWS_OR_BYTES;
extern const int QUOTA_EXPIRED;
extern const int QUOTA_EXCEEDED;
extern const int QUERY_WAS_CANCELLED;
}
@ -34,7 +34,7 @@ static bool checkCanAddAdditionalInfoToException(const DB::Exception & exception
{
/// Don't add additional info to limits and quota exceptions, and in case of kill query (to pass tests).
return exception.code() != ErrorCodes::TOO_MANY_ROWS_OR_BYTES
&& exception.code() != ErrorCodes::QUOTA_EXPIRED
&& exception.code() != ErrorCodes::QUOTA_EXCEEDED
&& exception.code() != ErrorCodes::QUERY_WAS_CANCELLED;
}

View File

@ -17,7 +17,7 @@ struct ColumnMapping
OptionalIndexes column_indexes_for_input_fields;
/// The list of column indexes that are not presented in input data.
std::vector<UInt8> not_presented_columns;
std::vector<size_t> not_presented_columns;
/// The list of column names in input data. Needed for better exception messages.
std::vector<String> names_of_columns;

View File

@ -98,8 +98,6 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
/// Skip prefix before names and types.
format_reader->skipPrefixBeforeHeader();
/// This is a bit of abstraction leakage, but we need it in parallel parsing:
/// we check if this InputFormat is working with the "real" beginning of the data.
if (with_names)
{
if (format_settings.with_names_use_header)

View File

@ -18,11 +18,12 @@ namespace DB
void CountingTransform::onConsume(Chunk chunk)
{
if (quota)
quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes());
Progress local_progress{WriteProgress(chunk.getNumRows(), chunk.bytes())};
progress.incrementPiecewiseAtomically(local_progress);
//std::cerr << "============ counting adding progress for " << static_cast<const void *>(thread_status) << ' ' << chunk.getNumRows() << " rows\n";
if (thread_status)
{
thread_status->performance_counters.increment(ProfileEvents::InsertedRows, local_progress.written_rows);

View File

@ -2,6 +2,7 @@
#include <IO/Progress.h>
#include <Processors/Transforms/ExceptionKeepingTransform.h>
#include <Access/EnabledQuota.h>
namespace DB
@ -14,8 +15,12 @@ class ThreadStatus;
class CountingTransform final : public ExceptionKeepingTransform
{
public:
explicit CountingTransform(const Block & header, ThreadStatus * thread_status_ = nullptr)
: ExceptionKeepingTransform(header, header), thread_status(thread_status_) {}
explicit CountingTransform(
const Block & header,
ThreadStatus * thread_status_ = nullptr,
std::shared_ptr<const EnabledQuota> quota_ = nullptr)
: ExceptionKeepingTransform(header, header)
, thread_status(thread_status_), quota(std::move(quota_)) {}
String getName() const override { return "CountingTransform"; }
@ -47,6 +52,9 @@ protected:
ProgressCallback progress_callback;
QueryStatus * process_elem = nullptr;
ThreadStatus * thread_status = nullptr;
/// Quota is used to limit amount of written bytes.
std::shared_ptr<const EnabledQuota> quota;
Chunk cur_chunk;
};

View File

@ -759,44 +759,6 @@ void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter)
header.clear();
}
void Pipe::setOutputFormat(ProcessorPtr output)
{
if (output_ports.empty())
throw Exception("Cannot set output format to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
if (output_ports.size() != 1)
throw Exception("Cannot set output format to Pipe because single output port is expected, "
"but it has " + std::to_string(output_ports.size()) + " ports", ErrorCodes::LOGICAL_ERROR);
auto * format = dynamic_cast<IOutputFormat * >(output.get());
if (!format)
throw Exception("IOutputFormat processor expected for QueryPipelineBuilder::setOutputFormat.",
ErrorCodes::LOGICAL_ERROR);
auto & main = format->getPort(IOutputFormat::PortKind::Main);
auto & totals = format->getPort(IOutputFormat::PortKind::Totals);
auto & extremes = format->getPort(IOutputFormat::PortKind::Extremes);
if (!totals_port)
addTotalsSource(std::make_shared<NullSource>(totals.getHeader()));
if (!extremes_port)
addExtremesSource(std::make_shared<NullSource>(extremes.getHeader()));
if (collected_processors)
collected_processors->emplace_back(output);
processors.emplace_back(std::move(output));
connect(*output_ports.front(), main);
connect(*totals_port, totals);
connect(*extremes_port, extremes);
output_ports.clear();
header.clear();
}
void Pipe::transform(const Transformer & transformer)
{
if (output_ports.empty())

View File

@ -141,7 +141,6 @@ private:
bool isCompleted() const { return !empty() && output_ports.empty(); }
static Pipe unitePipes(Pipes pipes, Processors * collected_processors, bool allow_empty_header);
void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter);
void setOutputFormat(ProcessorPtr output);
friend class QueryPipelineBuilder;
friend class QueryPipeline;

View File

@ -8,7 +8,6 @@
#include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
#include <Processors/Transforms/JoiningTransform.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/Executors/PipelineExecutor.h>
#include <Processors/Transforms/PartialSortingTransform.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
@ -247,21 +246,6 @@ void QueryPipelineBuilder::addExtremesTransform()
pipe.addTransform(std::move(transform), nullptr, port);
}
void QueryPipelineBuilder::setOutputFormat(ProcessorPtr output)
{
checkInitializedAndNotCompleted();
if (output_format)
throw Exception("QueryPipeline already has output.", ErrorCodes::LOGICAL_ERROR);
resize(1);
output_format = dynamic_cast<IOutputFormat * >(output.get());
pipe.setOutputFormat(std::move(output));
initRowsBeforeLimit();
}
QueryPipelineBuilder QueryPipelineBuilder::unitePipelines(
std::vector<std::unique_ptr<QueryPipelineBuilder>> pipelines,
size_t max_threads_limit,
@ -461,93 +445,6 @@ void QueryPipelineBuilder::setProcessListElement(QueryStatus * elem)
}
}
void QueryPipelineBuilder::initRowsBeforeLimit()
{
RowsBeforeLimitCounterPtr rows_before_limit_at_least;
/// TODO: add setRowsBeforeLimitCounter as virtual method to IProcessor.
std::vector<LimitTransform *> limits;
std::vector<RemoteSource *> remote_sources;
std::unordered_set<IProcessor *> visited;
struct QueuedEntry
{
IProcessor * processor;
bool visited_limit;
};
std::queue<QueuedEntry> queue;
queue.push({ output_format, false });
visited.emplace(output_format);
while (!queue.empty())
{
auto * processor = queue.front().processor;
auto visited_limit = queue.front().visited_limit;
queue.pop();
if (!visited_limit)
{
if (auto * limit = typeid_cast<LimitTransform *>(processor))
{
visited_limit = true;
limits.emplace_back(limit);
}
if (auto * source = typeid_cast<RemoteSource *>(processor))
remote_sources.emplace_back(source);
}
else if (auto * sorting = typeid_cast<PartialSortingTransform *>(processor))
{
if (!rows_before_limit_at_least)
rows_before_limit_at_least = std::make_shared<RowsBeforeLimitCounter>();
sorting->setRowsBeforeLimitCounter(rows_before_limit_at_least);
/// Don't go to children. Take rows_before_limit from last PartialSortingTransform.
continue;
}
/// Skip totals and extremes port for output format.
if (auto * format = dynamic_cast<IOutputFormat *>(processor))
{
auto * child_processor = &format->getPort(IOutputFormat::PortKind::Main).getOutputPort().getProcessor();
if (visited.emplace(child_processor).second)
queue.push({ child_processor, visited_limit });
continue;
}
for (auto & child_port : processor->getInputs())
{
auto * child_processor = &child_port.getOutputPort().getProcessor();
if (visited.emplace(child_processor).second)
queue.push({ child_processor, visited_limit });
}
}
if (!rows_before_limit_at_least && (!limits.empty() || !remote_sources.empty()))
{
rows_before_limit_at_least = std::make_shared<RowsBeforeLimitCounter>();
for (auto & limit : limits)
limit->setRowsBeforeLimitCounter(rows_before_limit_at_least);
for (auto & source : remote_sources)
source->setRowsBeforeLimitCounter(rows_before_limit_at_least);
}
/// If there is a limit, then enable rows_before_limit_at_least
/// It is needed when zero rows is read, but we still want rows_before_limit_at_least in result.
if (!limits.empty())
rows_before_limit_at_least->add(0);
if (rows_before_limit_at_least)
output_format->setRowsBeforeLimitCounter(rows_before_limit_at_least);
}
PipelineExecutorPtr QueryPipelineBuilder::execute()
{
if (!isCompleted())

View File

@ -10,8 +10,6 @@
namespace DB
{
class IOutputFormat;
class QueryPipelineProcessorsCollector;
struct AggregatingTransformParams;
@ -71,10 +69,6 @@ public:
void addTotalsHavingTransform(ProcessorPtr transform);
/// Add transform which calculates extremes. This transform adds extremes port and doesn't change inputs number.
void addExtremesTransform();
/// Resize pipeline to single output and add IOutputFormat. Pipeline will be completed after this transformation.
void setOutputFormat(ProcessorPtr output);
/// Get current OutputFormat.
IOutputFormat * getOutputFormat() const { return output_format; }
/// Sink is a processor with single input port and no output ports. Creates sink for each output port.
/// Pipeline will be completed after this transformation.
void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter);
@ -163,7 +157,6 @@ public:
private:
Pipe pipe;
IOutputFormat * output_format = nullptr;
/// Limit on the number of threads. Zero means no limit.
/// Sometimes, more streams are created then the number of threads for more optimal execution.
@ -174,8 +167,6 @@ private:
void checkInitialized();
void checkInitializedAndNotCompleted();
void initRowsBeforeLimit();
void setCollectedProcessors(Processors * processors);
friend class QueryPipelineProcessorsCollector;

View File

@ -922,7 +922,7 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
setThreadName("HTTPHandler");
ThreadStatus thread_status;
session = std::make_unique<Session>(server.context(), ClientInfo::Interface::HTTP);
session = std::make_unique<Session>(server.context(), ClientInfo::Interface::HTTP, request.isSecure());
SCOPE_EXIT({ session.reset(); });
std::optional<CurrentThread::QueryScope> query_scope;

View File

@ -110,7 +110,7 @@ void TCPHandler::runImpl()
setThreadName("TCPHandler");
ThreadStatus thread_status;
session = std::make_unique<Session>(server.context(), ClientInfo::Interface::TCP);
session = std::make_unique<Session>(server.context(), ClientInfo::Interface::TCP, socket().secure());
extractConnectionSettingsFromContext(server.context());
socket().setReceiveTimeout(receive_timeout);

View File

@ -25,6 +25,8 @@ namespace ErrorCodes
const String HDFSBuilderWrapper::CONFIG_PREFIX = "hdfs";
const String HDFS_URL_REGEXP = "^hdfs://[^/]*/.*";
std::once_flag init_libhdfs3_conf_flag;
void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config,
const String & prefix, bool isUser)
{
@ -123,19 +125,22 @@ HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::A
throw Exception("Illegal HDFS URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
// Shall set env LIBHDFS3_CONF *before* HDFSBuilderWrapper construction.
String libhdfs3_conf = config.getString(HDFSBuilderWrapper::CONFIG_PREFIX + ".libhdfs3_conf", "");
if (!libhdfs3_conf.empty())
std::call_once(init_libhdfs3_conf_flag, [&config]()
{
if (std::filesystem::path{libhdfs3_conf}.is_relative() && !std::filesystem::exists(libhdfs3_conf))
String libhdfs3_conf = config.getString(HDFSBuilderWrapper::CONFIG_PREFIX + ".libhdfs3_conf", "");
if (!libhdfs3_conf.empty())
{
const String config_path = config.getString("config-file", "config.xml");
const auto config_dir = std::filesystem::path{config_path}.remove_filename();
if (std::filesystem::exists(config_dir / libhdfs3_conf))
libhdfs3_conf = std::filesystem::absolute(config_dir / libhdfs3_conf);
if (std::filesystem::path{libhdfs3_conf}.is_relative() && !std::filesystem::exists(libhdfs3_conf))
{
const String config_path = config.getString("config-file", "config.xml");
const auto config_dir = std::filesystem::path{config_path}.remove_filename();
if (std::filesystem::exists(config_dir / libhdfs3_conf))
libhdfs3_conf = std::filesystem::absolute(config_dir / libhdfs3_conf);
}
setenv("LIBHDFS3_CONF", libhdfs3_conf.c_str(), 1);
}
});
setenv("LIBHDFS3_CONF", libhdfs3_conf.c_str(), 1);
}
HDFSBuilderWrapper builder;
if (builder.get() == nullptr)
throw Exception("Unable to create builder to connect to HDFS: " +

View File

@ -22,8 +22,6 @@ ReadBufferFromHDFS::~ReadBufferFromHDFS() = default;
struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<SeekableReadBuffer>
{
/// HDFS create/open functions are not thread safe
static std::mutex hdfs_init_mutex;
String hdfs_uri;
String hdfs_file_path;
@ -46,8 +44,6 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
, builder(createHDFSBuilder(hdfs_uri_, config_))
, read_until_position(read_until_position_)
{
std::lock_guard lock(hdfs_init_mutex);
fs = createHDFSFS(builder.get());
fin = hdfsOpenFile(fs.get(), hdfs_file_path.c_str(), O_RDONLY, 0, 0, 0);
@ -59,7 +55,6 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
~ReadBufferFromHDFSImpl() override
{
std::lock_guard lock(hdfs_init_mutex);
hdfsCloseFile(fs.get(), fin);
}
@ -124,9 +119,6 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
}
};
std::mutex ReadBufferFromHDFS::ReadBufferFromHDFSImpl::hdfs_init_mutex;
ReadBufferFromHDFS::ReadBufferFromHDFS(
const String & hdfs_uri_,
const String & hdfs_file_path_,

View File

@ -26,6 +26,7 @@
#include <Storages/HDFS/ReadBufferFromHDFS.h>
#include <Storages/HDFS/WriteBufferFromHDFS.h>
#include <Storages/PartitionedSink.h>
#include <Storages/getVirtualsForStorage.h>
#include <Formats/ReadSchemaUtils.h>
#include <Formats/FormatFactory.h>
@ -164,6 +165,13 @@ StorageHDFS::StorageHDFS(
storage_metadata.setConstraints(constraints_);
storage_metadata.setComment(comment);
setInMemoryMetadata(storage_metadata);
auto default_virtuals = NamesAndTypesList{
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList();
virtual_columns = getVirtualsForStorage(columns, default_virtuals);
}
ColumnsDescription StorageHDFS::getTableStructureFromData(
@ -273,36 +281,6 @@ private:
Strings::iterator uris_iter;
};
Block HDFSSource::getHeader(const StorageMetadataPtr & metadata_snapshot, bool need_path_column, bool need_file_column)
{
auto header = metadata_snapshot->getSampleBlock();
/// Note: AddingDefaultsBlockInputStream doesn't change header.
if (need_path_column)
header.insert(
{DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
"_path"});
if (need_file_column)
header.insert(
{DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
"_file"});
return header;
}
Block HDFSSource::getBlockForSource(
const StorageHDFSPtr & storage,
const StorageSnapshotPtr & storage_snapshot,
const ColumnsDescription & columns_description,
bool need_path_column,
bool need_file_column)
{
if (storage->isColumnOriented())
return storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
else
return getHeader(storage_snapshot->metadata, need_path_column, need_file_column);
}
HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(ContextPtr context_, const String & uri)
: pimpl(std::make_shared<HDFSSource::DisclosedGlobIterator::Impl>(context_, uri)) {}
@ -321,22 +299,28 @@ String HDFSSource::URISIterator::next()
return pimpl->next();
}
Block HDFSSource::getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns)
{
for (const auto & virtual_column : requested_virtual_columns)
sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name});
return sample_block;
}
HDFSSource::HDFSSource(
StorageHDFSPtr storage_,
const StorageSnapshotPtr & storage_snapshot_,
const Block & block_for_format_,
const std::vector<NameAndTypePair> & requested_virtual_columns_,
ContextPtr context_,
UInt64 max_block_size_,
bool need_path_column_,
bool need_file_column_,
std::shared_ptr<IteratorWrapper> file_iterator_,
ColumnsDescription columns_description_)
: SourceWithProgress(getBlockForSource(storage_, storage_snapshot_, columns_description_, need_path_column_, need_file_column_))
: SourceWithProgress(getHeader(block_for_format_, requested_virtual_columns_))
, WithContext(context_)
, storage(std::move(storage_))
, storage_snapshot(storage_snapshot_)
, block_for_format(block_for_format_)
, requested_virtual_columns(requested_virtual_columns_)
, max_block_size(max_block_size_)
, need_path_column(need_path_column_)
, need_file_column(need_file_column_)
, file_iterator(file_iterator_)
, columns_description(std::move(columns_description_))
{
@ -361,14 +345,7 @@ bool HDFSSource::initialize()
auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method);
read_buf = wrapReadBufferWithCompressionMethod(std::make_unique<ReadBufferFromHDFS>(uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef()), compression);
auto get_block_for_format = [&]() -> Block
{
if (storage->isColumnOriented())
return storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
return storage_snapshot->metadata->getSampleBlock();
};
auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, get_block_for_format(), max_block_size);
auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size);
QueryPipelineBuilder builder;
builder.init(Pipe(input_format));
@ -402,20 +379,21 @@ Chunk HDFSSource::generate()
Columns columns = chunk.getColumns();
UInt64 num_rows = chunk.getNumRows();
/// Enrich with virtual columns.
if (need_path_column)
for (const auto & virtual_column : requested_virtual_columns)
{
auto column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumnConst(num_rows, current_path);
columns.push_back(column->convertToFullColumnIfConst());
}
if (virtual_column.name == "_path")
{
auto column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumnConst(num_rows, current_path);
columns.push_back(column->convertToFullColumnIfConst());
}
else if (virtual_column.name == "_file")
{
size_t last_slash_pos = current_path.find_last_of('/');
auto file_name = current_path.substr(last_slash_pos + 1);
if (need_file_column)
{
size_t last_slash_pos = current_path.find_last_of('/');
auto file_name = current_path.substr(last_slash_pos + 1);
auto column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumnConst(num_rows, std::move(file_name));
columns.push_back(column->convertToFullColumnIfConst());
auto column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumnConst(num_rows, std::move(file_name));
columns.push_back(column->convertToFullColumnIfConst());
}
}
return Chunk(std::move(columns), num_rows);
@ -526,17 +504,6 @@ Pipe StorageHDFS::read(
size_t max_block_size,
unsigned num_streams)
{
bool need_path_column = false;
bool need_file_column = false;
for (const auto & column : column_names)
{
if (column == "_path")
need_path_column = true;
if (column == "_file")
need_file_column = true;
}
std::shared_ptr<HDFSSource::IteratorWrapper> iterator_wrapper{nullptr};
if (distributed_processing)
{
@ -563,27 +530,51 @@ Pipe StorageHDFS::read(
});
}
std::unordered_set<String> column_names_set(column_names.begin(), column_names.end());
std::vector<NameAndTypePair> requested_virtual_columns;
for (const auto & virtual_column : getVirtuals())
{
if (column_names_set.contains(virtual_column.name))
requested_virtual_columns.push_back(virtual_column);
}
ColumnsDescription columns_description;
Block block_for_format;
if (isColumnOriented())
{
auto fetch_columns = column_names;
const auto & virtuals = getVirtuals();
std::erase_if(
fetch_columns,
[&](const String & col)
{ return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); });
if (fetch_columns.empty())
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
columns_description = ColumnsDescription{
storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()};
block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
}
else
{
columns_description = storage_snapshot->metadata->getColumns();
block_for_format = storage_snapshot->metadata->getSampleBlock();
}
Pipes pipes;
auto this_ptr = std::static_pointer_cast<StorageHDFS>(shared_from_this());
for (size_t i = 0; i < num_streams; ++i)
{
const auto get_columns_for_format = [&]() -> ColumnsDescription
{
if (isColumnOriented())
return ColumnsDescription{storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()};
else
return storage_snapshot->metadata->getColumns();
};
pipes.emplace_back(std::make_shared<HDFSSource>(
this_ptr,
storage_snapshot,
block_for_format,
requested_virtual_columns,
context_,
max_block_size,
need_path_column,
need_file_column,
iterator_wrapper,
get_columns_for_format()));
columns_description));
}
return Pipe::unitePipes(std::move(pipes));
}
@ -715,9 +706,7 @@ void registerStorageHDFS(StorageFactory & factory)
NamesAndTypesList StorageHDFS::getVirtuals() const
{
return NamesAndTypesList{
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
return virtual_columns;
}
}

View File

@ -76,6 +76,7 @@ private:
const bool distributed_processing;
ASTPtr partition_by;
bool is_path_with_globs;
NamesAndTypesList virtual_columns;
Poco::Logger * log = &Poco::Logger::get("StorageHDFS");
};
@ -110,25 +111,14 @@ public:
using IteratorWrapper = std::function<String()>;
using StorageHDFSPtr = std::shared_ptr<StorageHDFS>;
static Block getHeader(
const StorageMetadataPtr & metadata_snapshot,
bool need_path_column,
bool need_file_column);
static Block getBlockForSource(
const StorageHDFSPtr & storage,
const StorageSnapshotPtr & storage_snapshot_,
const ColumnsDescription & columns_description,
bool need_path_column,
bool need_file_column);
static Block getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns);
HDFSSource(
StorageHDFSPtr storage_,
const StorageSnapshotPtr & storage_snapshot_,
const Block & block_for_format_,
const std::vector<NameAndTypePair> & requested_virtual_columns_,
ContextPtr context_,
UInt64 max_block_size_,
bool need_path_column_,
bool need_file_column_,
std::shared_ptr<IteratorWrapper> file_iterator_,
ColumnsDescription columns_description_);
@ -140,7 +130,8 @@ public:
private:
StorageHDFSPtr storage;
StorageSnapshotPtr storage_snapshot;
Block block_for_format;
std::vector<NameAndTypePair> requested_virtual_columns;
UInt64 max_block_size;
bool need_path_column;
bool need_file_column;

View File

@ -742,6 +742,7 @@ void registerStorageHive(StorageFactory & factory)
StorageFactory::StorageFeatures{
.supports_settings = true,
.supports_sort_order = true,
.source_access_type = AccessType::HIVE,
});
}

View File

@ -635,24 +635,35 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
/// Motivation: memory for index is shared between queries - not belong to the query itself.
MemoryTrackerBlockerInThread temporarily_disable_memory_tracker(VariableContext::Global);
loadUUID();
loadColumns(require_columns_checksums);
loadChecksums(require_columns_checksums);
loadIndexGranularity();
calculateColumnsAndSecondaryIndicesSizesOnDisk();
loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity`
loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`.
loadPartitionAndMinMaxIndex();
if (!parent_part)
try
{
loadTTLInfos();
loadProjections(require_columns_checksums, check_consistency);
loadUUID();
loadColumns(require_columns_checksums);
loadChecksums(require_columns_checksums);
loadIndexGranularity();
calculateColumnsAndSecondaryIndicesSizesOnDisk();
loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity`
loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`.
loadPartitionAndMinMaxIndex();
if (!parent_part)
{
loadTTLInfos();
loadProjections(require_columns_checksums, check_consistency);
}
if (check_consistency)
checkConsistency(require_columns_checksums);
loadDefaultCompressionCodec();
}
catch (...)
{
// There could be conditions that data part to be loaded is broken, but some of meta infos are already written
// into meta data before exception, need to clean them all.
metadata_manager->deleteAll(/*include_projection*/ true);
metadata_manager->assertAllDeleted(/*include_projection*/ true);
throw;
}
if (check_consistency)
checkConsistency(require_columns_checksums);
loadDefaultCompressionCodec();
}
void IMergeTreeDataPart::appendFilesOfColumnsChecksumsIndexes(Strings & files, bool include_projection) const

View File

@ -1314,9 +1314,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
if (!parts_from_wal.empty())
loadDataPartsFromWAL(broken_parts_to_detach, duplicate_parts_to_remove, parts_from_wal, part_lock);
for (auto & part : duplicate_parts_to_remove)
part->remove();
for (auto & part : broken_parts_to_detach)
part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes

View File

@ -25,6 +25,7 @@
#include <Storages/StorageS3Settings.h>
#include <Storages/StorageSnapshot.h>
#include <Storages/PartitionedSink.h>
#include <Storages/getVirtualsForStorage.h>
#include <IO/ReadBufferFromS3.h>
#include <IO/WriteBufferFromS3.h>
@ -210,25 +211,16 @@ String StorageS3Source::KeysIterator::next()
return pimpl->next();
}
Block StorageS3Source::getHeader(Block sample_block, bool with_path_column, bool with_file_column)
Block StorageS3Source::getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns)
{
if (with_path_column)
sample_block.insert(
{DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
"_path"});
if (with_file_column)
sample_block.insert(
{DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
"_file"});
for (const auto & virtual_column : requested_virtual_columns)
sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name});
return sample_block;
}
StorageS3Source::StorageS3Source(
bool need_path,
bool need_file,
const std::vector<NameAndTypePair> & requested_virtual_columns_,
const String & format_,
String name_,
const Block & sample_block_,
@ -242,7 +234,7 @@ StorageS3Source::StorageS3Source(
const String & bucket_,
std::shared_ptr<IteratorWrapper> file_iterator_,
const size_t download_thread_num_)
: SourceWithProgress(getHeader(sample_block_, need_path, need_file))
: SourceWithProgress(getHeader(sample_block_, requested_virtual_columns_))
, WithContext(context_)
, name(std::move(name_))
, bucket(bucket_)
@ -254,8 +246,7 @@ StorageS3Source::StorageS3Source(
, client(client_)
, sample_block(sample_block_)
, format_settings(format_settings_)
, with_file_column(need_file)
, with_path_column(need_path)
, requested_virtual_columns(requested_virtual_columns_)
, file_iterator(file_iterator_)
, download_thread_num(download_thread_num_)
{
@ -344,16 +335,18 @@ Chunk StorageS3Source::generate()
{
UInt64 num_rows = chunk.getNumRows();
if (with_path_column)
chunk.addColumn(DataTypeLowCardinality{std::make_shared<DataTypeString>()}
.createColumnConst(num_rows, file_path)
->convertToFullColumnIfConst());
if (with_file_column)
for (const auto & virtual_column : requested_virtual_columns)
{
size_t last_slash_pos = file_path.find_last_of('/');
chunk.addColumn(DataTypeLowCardinality{std::make_shared<DataTypeString>()}
.createColumnConst(num_rows, file_path.substr(last_slash_pos + 1))
->convertToFullColumnIfConst());
if (virtual_column.name == "_path")
{
chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_path)->convertToFullColumnIfConst());
}
else if (virtual_column.name == "_file")
{
size_t last_slash_pos = file_path.find_last_of('/');
auto column = virtual_column.type->createColumnConst(num_rows, file_path.substr(last_slash_pos + 1));
chunk.addColumn(column->convertToFullColumnIfConst());
}
}
return chunk;
@ -627,6 +620,13 @@ StorageS3::StorageS3(
storage_metadata.setConstraints(constraints_);
storage_metadata.setComment(comment);
setInMemoryMetadata(storage_metadata);
auto default_virtuals = NamesAndTypesList{
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList();
virtual_columns = getVirtualsForStorage(columns, default_virtuals);
}
std::shared_ptr<StorageS3Source::IteratorWrapper> StorageS3::createFileIterator(const ClientAuthentication & client_auth, const std::vector<String> & keys, bool is_key_with_globs, bool distributed_processing, ContextPtr local_context)
@ -674,14 +674,14 @@ Pipe StorageS3::read(
updateClientAndAuthSettings(local_context, client_auth);
Pipes pipes;
bool need_path_column = false;
bool need_file_column = false;
for (const auto & column : column_names)
std::unordered_set<String> column_names_set(column_names.begin(), column_names.end());
std::vector<NameAndTypePair> requested_virtual_columns;
for (const auto & virtual_column : getVirtuals())
{
if (column == "_path")
need_path_column = true;
if (column == "_file")
need_file_column = true;
if (column_names_set.contains(virtual_column.name))
requested_virtual_columns.push_back(virtual_column);
}
std::shared_ptr<StorageS3Source::IteratorWrapper> iterator_wrapper = createFileIterator(client_auth, keys, is_key_with_globs, distributed_processing, local_context);
@ -690,8 +690,18 @@ Pipe StorageS3::read(
Block block_for_format;
if (isColumnOriented())
{
auto fetch_columns = column_names;
const auto & virtuals = getVirtuals();
std::erase_if(
fetch_columns,
[&](const String & col)
{ return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); });
if (fetch_columns.empty())
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
columns_description = ColumnsDescription{
storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()};
storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()};
block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
}
else
@ -704,8 +714,7 @@ Pipe StorageS3::read(
for (size_t i = 0; i < num_streams; ++i)
{
pipes.emplace_back(std::make_shared<StorageS3Source>(
need_path_column,
need_file_column,
requested_virtual_columns,
format_name,
getName(),
block_for_format,
@ -882,6 +891,8 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt
configuration.access_key_id = arg_value->as<ASTLiteral>()->value.safeGet<String>();
else if (arg_name == "secret_access_key")
configuration.secret_access_key = arg_value->as<ASTLiteral>()->value.safeGet<String>();
else if (arg_name == "filename")
configuration.url = std::filesystem::path(configuration.url) / arg_value->as<ASTLiteral>()->value.safeGet<String>();
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Unknown key-value argument `{}` for StorageS3, expected: url, [access_key_id, secret_access_key], name of used format and [compression_method].",
@ -1081,9 +1092,7 @@ void registerStorageCOS(StorageFactory & factory)
NamesAndTypesList StorageS3::getVirtuals() const
{
return NamesAndTypesList{
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
return virtual_columns;
}
bool StorageS3::supportsPartitionBy() const

View File

@ -58,11 +58,10 @@ public:
using IteratorWrapper = std::function<String()>;
static Block getHeader(Block sample_block, bool with_path_column, bool with_file_column);
static Block getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns);
StorageS3Source(
bool need_path,
bool need_file,
const std::vector<NameAndTypePair> & requested_virtual_columns_,
const String & format,
String name_,
const Block & sample_block,
@ -102,8 +101,7 @@ private:
std::unique_ptr<PullingPipelineExecutor> reader;
/// onCancel and generate can be called concurrently
std::mutex reader_mutex;
bool with_file_column = false;
bool with_path_column = false;
std::vector<NameAndTypePair> requested_virtual_columns;
std::shared_ptr<IteratorWrapper> file_iterator;
size_t download_thread_num = 1;
@ -196,6 +194,7 @@ private:
ClientAuthentication client_auth;
std::vector<String> keys;
NamesAndTypesList virtual_columns;
String format_name;
UInt64 max_single_read_retries;

View File

@ -467,7 +467,7 @@ std::pair<BlocksPtr, Block> StorageWindowView::getNewBlocks(UInt32 watermark)
InterpreterSelectQuery fetch(
getFetchColumnQuery(w_start, watermark),
window_view_context,
getContext(),
getInnerStorage(),
nullptr,
SelectQueryOptions(QueryProcessingStage::FetchColumns));
@ -509,11 +509,11 @@ std::pair<BlocksPtr, Block> StorageWindowView::getNewBlocks(UInt32 watermark)
return StorageBlocks::createStorage(blocks_id_global, required_columns, std::move(pipes), QueryProcessingStage::WithMergeableState);
};
TemporaryTableHolder blocks_storage(window_view_context, creator);
TemporaryTableHolder blocks_storage(getContext(), creator);
InterpreterSelectQuery select(
getFinalQuery(),
window_view_context,
getContext(),
blocks_storage.getTable(),
blocks_storage.getTable()->getInMemoryMetadataPtr(),
SelectQueryOptions(QueryProcessingStage::Complete));
@ -617,8 +617,8 @@ std::shared_ptr<ASTCreateQuery> StorageWindowView::getInnerTableCreateQuery(
auto t_sample_block
= InterpreterSelectQuery(
inner_select_query, window_view_context, getParentStorage(), nullptr,
SelectQueryOptions(QueryProcessingStage::WithMergeableState)) .getSampleBlock();
inner_select_query, getContext(), getParentStorage(), nullptr, SelectQueryOptions(QueryProcessingStage::WithMergeableState))
.getSampleBlock();
auto columns_list = std::make_shared<ASTExpressionList>();
@ -891,7 +891,7 @@ void StorageWindowView::updateMaxWatermark(UInt32 watermark)
inline void StorageWindowView::cleanup()
{
InterpreterAlterQuery alter_query(getCleanupQuery(), window_view_context);
InterpreterAlterQuery alter_query(getCleanupQuery(), getContext());
alter_query.execute();
std::lock_guard lock(fire_signal_mutex);
@ -999,9 +999,6 @@ StorageWindowView::StorageWindowView(
, WithContext(context_->getGlobalContext())
, log(&Poco::Logger::get(fmt::format("StorageWindowView({}.{})", table_id_.database_name, table_id_.table_name)))
{
window_view_context = Context::createCopy(getContext());
window_view_context->makeQueryContext();
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(columns_);
setInMemoryMetadata(storage_metadata);
@ -1089,11 +1086,11 @@ StorageWindowView::StorageWindowView(
clean_interval_ms = getContext()->getSettingsRef().window_view_clean_interval.totalMilliseconds();
next_fire_signal = getWindowUpperBound(std::time(nullptr));
clean_cache_task = window_view_context->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); });
clean_cache_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); });
if (is_proctime)
fire_task = window_view_context->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireProc(); });
fire_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireProc(); });
else
fire_task = window_view_context->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireEvent(); });
fire_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireEvent(); });
clean_cache_task->deactivate();
fire_task->deactivate();
}
@ -1424,9 +1421,10 @@ Block & StorageWindowView::getHeader() const
std::lock_guard lock(sample_block_lock);
if (!sample_block)
{
sample_block = InterpreterSelectQuery(
select_query->clone(), window_view_context, getParentStorage(), nullptr,
SelectQueryOptions(QueryProcessingStage::Complete)).getSampleBlock();
sample_block
= InterpreterSelectQuery(
select_query->clone(), getContext(), getParentStorage(), nullptr, SelectQueryOptions(QueryProcessingStage::Complete))
.getSampleBlock();
/// convert all columns to full columns
/// in case some of them are constant
for (size_t i = 0; i < sample_block.columns(); ++i)

View File

@ -157,7 +157,6 @@ private:
/// Used to fetch the mergeable state and generate the final result. e.g. SELECT * FROM * GROUP BY tumble(____timestamp, *)
ASTPtr final_query;
ContextMutablePtr window_view_context;
bool is_proctime{true};
bool is_time_column_func_now;
bool is_tumble; // false if is hop
@ -182,7 +181,6 @@ private:
/// Mutex for the blocks and ready condition
std::mutex mutex;
std::mutex flush_table_mutex;
std::shared_mutex fire_signal_mutex;
mutable std::mutex sample_block_lock; /// Mutex to protect access to sample block and inner_blocks_query

View File

@ -0,0 +1,22 @@
#include "getVirtualsForStorage.h"
namespace DB
{
NamesAndTypesList getVirtualsForStorage(const NamesAndTypesList & storage_columns_, const NamesAndTypesList & default_virtuals_)
{
auto default_virtuals = default_virtuals_;
auto storage_columns = storage_columns_;
default_virtuals.sort();
storage_columns.sort();
NamesAndTypesList result_virtuals;
std::set_difference(
default_virtuals.begin(), default_virtuals.end(), storage_columns.begin(), storage_columns.end(),
std::back_inserter(result_virtuals),
[](const NameAndTypePair & lhs, const NameAndTypePair & rhs){ return lhs.name < rhs.name; });
return result_virtuals;
}
}

View File

@ -0,0 +1,9 @@
#pragma once
#include <Core/NamesAndTypes.h>
namespace DB
{
NamesAndTypesList getVirtualsForStorage(const NamesAndTypesList & storage_columns_, const NamesAndTypesList & default_virtuals_);
}

View File

@ -10,7 +10,7 @@ class TableFunctionHive : public ITableFunction
{
public:
static constexpr auto name = "hive";
static constexpr auto storage_type_name = "hive";
static constexpr auto storage_type_name = "Hive";
std::string getName() const override { return name; }
bool hasStaticStructure() const override { return true; }

View File

@ -14,11 +14,16 @@ namespace ProfileEvents
namespace DB
{
AccessType ITableFunction::getSourceAccessType() const
{
return StorageFactory::instance().getSourceAccessType(getStorageTypeName());
}
StoragePtr ITableFunction::execute(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name,
ColumnsDescription cached_columns, bool use_global_context) const
{
ProfileEvents::increment(ProfileEvents::TableFunctionExecute);
context->checkAccess(AccessType::CREATE_TEMPORARY_TABLE | StorageFactory::instance().getSourceAccessType(getStorageTypeName()));
context->checkAccess(AccessType::CREATE_TEMPORARY_TABLE | getSourceAccessType());
auto context_to_use = use_global_context ? context->getGlobalContext() : context;

View File

@ -3,6 +3,7 @@
#include <Parsers/IAST_fwd.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/ColumnsDescription.h>
#include <Access/Common/AccessType.h>
#include <memory>
#include <string>
@ -71,7 +72,10 @@ public:
private:
virtual StoragePtr executeImpl(
const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const = 0;
virtual const char * getStorageTypeName() const = 0;
virtual AccessType getSourceAccessType() const;
};
using TableFunctionPtr = std::shared_ptr<ITableFunction>;

View File

@ -39,6 +39,8 @@ protected:
const char * getStorageTypeName() const override { return "HDFSCluster"; }
AccessType getSourceAccessType() const override { return AccessType::HDFS; }
ColumnsDescription getActualTableStructure(ContextPtr) const override;
void parseArguments(const ASTPtr &, ContextPtr) override;

View File

@ -18,6 +18,7 @@ namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
}
void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr context)
@ -29,6 +30,12 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr
auto args = function->arguments->children;
if (args.empty())
{
structure = "auto";
return;
}
if (args.size() != 1)
throw Exception("Table function '" + getName() + "' requires exactly 1 argument: structure",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
@ -38,6 +45,16 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr
ColumnsDescription TableFunctionInput::getActualTableStructure(ContextPtr context) const
{
if (structure == "auto")
{
if (structure_hint.empty())
throw Exception(
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
"Table function '{}' was used without structure argument but structure could not be determined automatically. Please, "
"provide structure manually",
getName());
return structure_hint;
}
return parseColumnsListFromString(structure, context);
}

View File

@ -16,6 +16,8 @@ public:
static constexpr auto name = "input";
std::string getName() const override { return name; }
bool hasStaticStructure() const override { return true; }
bool needStructureHint() const override { return true; }
void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
private:
StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override;
@ -25,6 +27,7 @@ private:
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
String structure;
ColumnsDescription structure_hint;
};
}

View File

@ -12,6 +12,7 @@
#include <Storages/StorageS3.h>
#include <Formats/FormatFactory.h>
#include "registerTableFunctions.h"
#include <filesystem>
namespace DB
@ -37,6 +38,8 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar
s3_configuration.access_key_id = arg_value->as<ASTLiteral>()->value.safeGet<String>();
else if (arg_name == "secret_access_key")
s3_configuration.secret_access_key = arg_value->as<ASTLiteral>()->value.safeGet<String>();
else if (arg_name == "filename")
s3_configuration.url = std::filesystem::path(s3_configuration.url) / arg_value->as<ASTLiteral>()->value.safeGet<String>();
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message);
}

View File

@ -45,6 +45,8 @@ protected:
const char * getStorageTypeName() const override { return "S3Cluster"; }
AccessType getSourceAccessType() const override { return AccessType::S3; }
ColumnsDescription getActualTableStructure(ContextPtr) const override;
void parseArguments(const ASTPtr &, ContextPtr) override;

View File

@ -264,7 +264,7 @@ def main():
version_type = "stable"
official_flag = True
update_version_local(REPO_COPY, version, version_type)
update_version_local(version, version_type)
logging.info("Updated local files with version")

View File

@ -397,17 +397,19 @@ def main():
images_dict = get_images_dict(GITHUB_WORKSPACE, "docker/images.json")
pr_info = PRInfo()
if args.all:
pr_info = PRInfo()
pr_info.changed_files = set(images_dict.keys())
elif args.image_path:
pr_info = PRInfo()
pr_info.changed_files = set(i for i in args.image_path)
else:
pr_info = PRInfo(need_changed_files=True)
pr_info.fetch_changed_files()
changed_images = get_changed_docker_images(pr_info, images_dict)
logging.info("Has changed images %s", ", ".join([im.path for im in changed_images]))
if changed_images:
logging.info(
"Has changed images: %s", ", ".join([im.path for im in changed_images])
)
image_versions, result_version = gen_versions(pr_info, args.suffix)

View File

@ -56,18 +56,20 @@ def get_images_with_versions(
for i in range(10):
try:
logging.info("Pulling image %s", docker_image)
latest_error = subprocess.check_output(
subprocess.check_output(
f"docker pull {docker_image}",
stderr=subprocess.STDOUT,
shell=True,
)
break
except Exception as ex:
latest_error = ex
time.sleep(i * 3)
logging.info("Got execption pulling docker %s", ex)
else:
raise Exception(
f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}"
"Cannot pull dockerhub for image docker pull "
f"{docker_image} because of {latest_error}"
)
return docker_images

360
tests/ci/docker_server.py Normal file
View File

@ -0,0 +1,360 @@
#!/usr/bin/env python
# here
import argparse
import json
import logging
import subprocess
from os import path as p, makedirs
from typing import List, Tuple
from github import Github
from build_check import get_release_or_pr
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from docker_images_check import DockerImage
from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
from s3_helper import S3Helper
from stopwatch import Stopwatch
from upload_result_helper import upload_results
from version_helper import (
ClickHouseVersion,
get_tagged_versions,
get_version_from_repo,
get_version_from_string,
)
TEMP_PATH = p.join(RUNNER_TEMP, "docker_images_check")
BUCKETS = {"amd64": "package_release", "arm64": "package_aarch64"}
class DelOS(argparse.Action):
def __call__(self, _, namespace, __, option_string=None):
no_build = self.dest[3:] if self.dest.startswith("no_") else self.dest
if no_build in namespace.os:
namespace.os.remove(no_build)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="A program to build clickhouse-server image, both alpine and "
"ubuntu versions",
)
parser.add_argument(
"--version",
type=version_arg,
default=get_version_from_repo().string,
help="a version to build",
)
parser.add_argument(
"--release-type",
type=str,
choices=("auto", "latest", "major", "minor", "patch", "head"),
default="head",
help="version part that will be updated when '--version' is set; "
"'auto' is a special case, it will get versions from github and detect the "
"release type (latest, major, minor or patch) automatically",
)
parser.add_argument(
"--image-path",
type=str,
default="docker/server",
help="a path to docker context directory",
)
parser.add_argument(
"--image-repo",
type=str,
default="clickhouse/clickhouse-server",
help="image name on docker hub",
)
parser.add_argument(
"--bucket-prefix",
help="if set, then is used as source for deb and tgz files",
)
parser.add_argument("--reports", default=True, help=argparse.SUPPRESS)
parser.add_argument(
"--no-reports",
action="store_false",
dest="reports",
default=argparse.SUPPRESS,
help="don't push reports to S3 and github",
)
parser.add_argument("--push", default=True, help=argparse.SUPPRESS)
parser.add_argument(
"--no-push-images",
action="store_false",
dest="push",
default=argparse.SUPPRESS,
help="don't push images to docker hub",
)
parser.add_argument("--os", default=["ubuntu", "alpine"], help=argparse.SUPPRESS)
parser.add_argument(
"--no-ubuntu",
action=DelOS,
nargs=0,
default=argparse.SUPPRESS,
help="don't build ubuntu image",
)
parser.add_argument(
"--no-alpine",
action=DelOS,
nargs=0,
default=argparse.SUPPRESS,
help="don't build alpine image",
)
return parser.parse_args()
def version_arg(version: str) -> ClickHouseVersion:
try:
return get_version_from_string(version)
except ValueError as e:
raise argparse.ArgumentTypeError(e)
def auto_release_type(version: ClickHouseVersion, release_type: str) -> str:
if release_type != "auto":
return release_type
git_versions = get_tagged_versions()
reference_version = git_versions[0]
for i in reversed(range(len(git_versions))):
if git_versions[i] < version:
if i == len(git_versions) - 1:
return "latest"
reference_version = git_versions[i + 1]
break
if version.major < reference_version.major:
return "major"
if version.minor < reference_version.minor:
return "minor"
if version.patch < reference_version.patch:
return "patch"
raise ValueError(
"Release type 'tweak' is not supported for "
f"{version.string} < {reference_version.string}"
)
def gen_tags(version: ClickHouseVersion, release_type: str) -> List[str]:
"""
22.2.2.2 + latest:
- latest
- 22
- 22.2
- 22.2.2
- 22.2.2.2
22.2.2.2 + major:
- 22
- 22.2
- 22.2.2
- 22.2.2.2
22.2.2.2 + minor:
- 22.2
- 22.2.2
- 22.2.2.2
22.2.2.2 + patch:
- 22.2.2
- 22.2.2.2
22.2.2.2 + head:
- head
"""
parts = version.string.split(".")
tags = []
if release_type == "latest":
tags.append(release_type)
for i in range(len(parts)):
tags.append(".".join(parts[: i + 1]))
elif release_type == "major":
for i in range(len(parts)):
tags.append(".".join(parts[: i + 1]))
elif release_type == "minor":
for i in range(1, len(parts)):
tags.append(".".join(parts[: i + 1]))
elif release_type == "patch":
for i in range(2, len(parts)):
tags.append(".".join(parts[: i + 1]))
elif release_type == "head":
tags.append(release_type)
else:
raise ValueError(f"{release_type} is not valid release part")
return tags
def buildx_args(bucket_prefix: str, arch: str) -> List[str]:
args = [f"--platform=linux/{arch}", f"--label=build-url={GITHUB_RUN_URL}"]
if bucket_prefix:
url = p.join(bucket_prefix, BUCKETS[arch]) # to prevent a double //
args.append(f"--build-arg=REPOSITORY='{url}'")
args.append(f"--build-arg=deb_location_url='{url}'")
return args
def build_and_push_image(
image: DockerImage,
push: bool,
bucket_prefix: str,
os: str,
tag: str,
version: ClickHouseVersion,
) -> List[Tuple[str, str]]:
result = []
if os != "ubuntu":
tag += f"-{os}"
init_args = ["docker", "buildx", "build"]
if push:
init_args.append("--push")
init_args.append("--output=type=image,push-by-digest=true")
init_args.append(f"--tag={image.repo}")
else:
init_args.append("--output=type=docker")
# `docker buildx build --load` does not support multiple images currently
# images must be built separately and merged together with `docker manifest`
digests = []
for arch in BUCKETS:
arch_tag = f"{tag}-{arch}"
metadata_path = p.join(TEMP_PATH, arch_tag)
dockerfile = p.join(image.full_path, f"Dockerfile.{os}")
cmd_args = list(init_args)
cmd_args.extend(buildx_args(bucket_prefix, arch))
if not push:
cmd_args.append(f"--tag={image.repo}:{arch_tag}")
cmd_args.extend(
[
f"--metadata-file={metadata_path}",
f"--build-arg=VERSION='{version.string}'",
"--progress=plain",
f"--file={dockerfile}",
image.full_path,
]
)
cmd = " ".join(cmd_args)
logging.info("Building image %s:%s for arch %s: %s", image.repo, tag, arch, cmd)
with subprocess.Popen(
cmd,
shell=True,
stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
universal_newlines=True,
) as process:
for line in process.stdout: # type: ignore
print(line, end="")
retcode = process.wait()
if retcode != 0:
result.append((f"{image.repo}:{tag}-{arch}", "FAIL"))
return result
result.append((f"{image.repo}:{tag}-{arch}", "OK"))
with open(metadata_path, "rb") as m:
metadata = json.load(m)
digests.append(metadata["containerimage.digest"])
if push:
cmd = (
"docker buildx imagetools create "
f"--tag {image.repo}:{tag} {' '.join(digests)}"
)
logging.info("Pushing merged %s:%s image: %s", image.repo, tag, cmd)
with subprocess.Popen(
cmd,
shell=True,
stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
universal_newlines=True,
) as process:
for line in process.stdout: # type: ignore
print(line, end="")
retcode = process.wait()
if retcode != 0:
result.append((f"{image.repo}:{tag}", "FAIL"))
else:
logging.info(
"Merging is available only on push, separate %s images are created",
f"{image.repo}:{tag}-$arch",
)
return result
def main():
logging.basicConfig(level=logging.INFO)
stopwatch = Stopwatch()
makedirs(TEMP_PATH, exist_ok=True)
args = parse_args()
image = DockerImage(args.image_path, args.image_repo, False)
args.release_type = auto_release_type(args.version, args.release_type)
tags = gen_tags(args.version, args.release_type)
NAME = f"Docker image {image.repo} building check (actions)"
pr_info = None
if CI:
pr_info = PRInfo()
release_or_pr = get_release_or_pr(pr_info, {"package_type": ""}, args.version)
args.bucket_prefix = (
f"https://s3.amazonaws.com/{S3_BUILDS_BUCKET}/"
f"{release_or_pr}/{pr_info.sha}"
)
if args.push:
subprocess.check_output( # pylint: disable=unexpected-keyword-arg
"docker login --username 'robotclickhouse' --password-stdin",
input=get_parameter_from_ssm("dockerhub_robot_password"),
encoding="utf-8",
shell=True,
)
NAME = f"Docker image {image.repo} build and push (actions)"
logging.info("Following tags will be created: %s", ", ".join(tags))
status = "success"
test_results = [] # type: List[Tuple[str, str]]
for os in args.os:
for tag in tags:
test_results.extend(
build_and_push_image(
image, args.push, args.bucket_prefix, os, tag, args.version
)
)
if test_results[-1][1] != "OK":
status = "failure"
pr_info = pr_info or PRInfo()
s3_helper = S3Helper("https://s3.amazonaws.com")
url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME)
print(f"::notice ::Report url: {url}")
print(f'::set-output name=url_output::"{url}"')
if not args.reports:
return
description = f"Processed tags: {', '.join(tags)}"
if len(description) >= 140:
description = description[:136] + "..."
gh = Github(get_best_robot_token())
post_commit_status(gh, pr_info.sha, NAME, description, status, url)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
url,
NAME,
)
ch_helper = ClickHouseHelper()
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
if __name__ == "__main__":
main()

View File

@ -2,12 +2,16 @@
import os
import unittest
from unittest.mock import patch
from unittest.mock import patch, MagicMock
from env_helper import GITHUB_RUN_URL
from pr_info import PRInfo
import docker_images_check as di
with patch("git_helper.Git"):
from version_helper import get_version_from_string, get_tagged_versions
import docker_server as ds
# di.logging.basicConfig(level=di.logging.INFO)
@ -221,5 +225,46 @@ class TestDockerImageCheck(unittest.TestCase):
self.assertEqual(results, expected)
class TestDockerServer(unittest.TestCase):
def test_gen_tags(self):
version = get_version_from_string("22.2.2.2")
cases = (
("latest", ["latest", "22", "22.2", "22.2.2", "22.2.2.2"]),
("major", ["22", "22.2", "22.2.2", "22.2.2.2"]),
("minor", ["22.2", "22.2.2", "22.2.2.2"]),
("patch", ["22.2.2", "22.2.2.2"]),
("head", ["head"]),
)
for case in cases:
release_type = case[0]
self.assertEqual(case[1], ds.gen_tags(version, release_type))
with self.assertRaises(ValueError):
ds.gen_tags(version, "auto")
@patch("docker_server.get_tagged_versions")
def test_auto_release_type(self, mock_tagged_versions: MagicMock):
mock_tagged_versions.return_value = [
get_version_from_string("1.1.1.1"),
get_version_from_string("1.2.1.1"),
get_version_from_string("2.1.1.1"),
get_version_from_string("2.2.1.1"),
get_version_from_string("2.2.2.1"),
]
cases = (
(get_version_from_string("1.0.1.1"), "minor"),
(get_version_from_string("1.1.2.1"), "minor"),
(get_version_from_string("1.3.1.1"), "major"),
(get_version_from_string("2.1.2.1"), "minor"),
(get_version_from_string("2.2.1.3"), "patch"),
(get_version_from_string("2.2.3.1"), "latest"),
(get_version_from_string("2.3.1.1"), "latest"),
)
_ = get_tagged_versions()
for case in cases:
release = ds.auto_release_type(case[0], "auto")
self.assertEqual(case[1], release)
if __name__ == "__main__":
unittest.main()

View File

@ -1,7 +1,11 @@
import os
from os import path as p
module_dir = p.abspath(p.dirname(__file__))
git_root = p.abspath(p.join(module_dir, "..", ".."))
CI = bool(os.getenv("CI"))
TEMP_PATH = os.getenv("TEMP_PATH", os.path.abspath("."))
TEMP_PATH = os.getenv("TEMP_PATH", module_dir)
CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH)
CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN")
@ -9,11 +13,11 @@ GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH")
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")
GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID", "0")
GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com")
GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))
GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", git_root)
GITHUB_RUN_URL = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
IMAGES_PATH = os.getenv("IMAGES_PATH")
REPORTS_PATH = os.getenv("REPORTS_PATH", "./reports")
REPO_COPY = os.getenv("REPO_COPY", os.path.abspath("../../"))
RUNNER_TEMP = os.getenv("RUNNER_TEMP", os.path.abspath("./tmp"))
REPORTS_PATH = os.getenv("REPORTS_PATH", p.abspath(p.join(module_dir, "./reports")))
REPO_COPY = os.getenv("REPO_COPY", git_root)
RUNNER_TEMP = os.getenv("RUNNER_TEMP", p.abspath(p.join(module_dir, "./tmp")))
S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds")
S3_TEST_REPORTS_BUCKET = os.getenv("S3_TEST_REPORTS_BUCKET", "clickhouse-test-reports")

View File

@ -3,7 +3,7 @@ import argparse
import os.path as p
import re
import subprocess
from typing import Optional
from typing import List, Optional
# ^ and $ match subline in `multiple\nlines`
# \A and \Z match only start and end of the whole string
@ -89,7 +89,7 @@ class Git:
self.run(f"git rev-list {self.latest_tag}..HEAD --count")
)
def _check_tag(self, value: str):
def check_tag(self, value: str):
if value == "":
return
if not self._tag_pattern.match(value):
@ -101,7 +101,7 @@ class Git:
@latest_tag.setter
def latest_tag(self, value: str):
self._check_tag(value)
self.check_tag(value)
self._latest_tag = value
@property
@ -110,7 +110,7 @@ class Git:
@new_tag.setter
def new_tag(self, value: str):
self._check_tag(value)
self.check_tag(value)
self._new_tag = value
@property
@ -122,3 +122,6 @@ class Git:
version = self.latest_tag.split("-", maxsplit=1)[0]
return int(version.split(".")[-1]) + self.commits_since_tag
def get_tags(self) -> List[str]:
return self.run("git tag").split()

View File

@ -1,10 +1,8 @@
#!/usr/bin/env python3
import datetime
import logging
import os.path as p
import subprocess
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from typing import Dict, Tuple, Union
from typing import Dict, List, Tuple, Union
from git_helper import Git, removeprefix
@ -49,12 +47,16 @@ class ClickHouseVersion:
patch: Union[int, str],
revision: Union[int, str],
git: Git,
tweak: str = None,
):
self._major = int(major)
self._minor = int(minor)
self._patch = int(patch)
self._revision = int(revision)
self._git = git
self._tweak = None
if tweak is not None:
self._tweak = int(tweak)
self._describe = ""
def update(self, part: str) -> "ClickHouseVersion":
@ -89,7 +91,7 @@ class ClickHouseVersion:
@property
def tweak(self) -> int:
return self._git.tweak
return self._tweak or self._git.tweak
@property
def revision(self) -> int:
@ -129,6 +131,25 @@ class ClickHouseVersion:
raise ValueError(f"version type {version_type} not in {VersionType.VALID}")
self._describe = f"v{self.string}-{version_type}"
def __eq__(self, other) -> bool:
if not isinstance(self, type(other)):
return NotImplemented
return (
self.major == other.major
and self.minor == other.minor
and self.patch == other.patch
and self.tweak == other.tweak
)
def __lt__(self, other: "ClickHouseVersion") -> bool:
for part in ("major", "minor", "patch", "tweak"):
if getattr(self, part) < getattr(other, part):
return True
elif getattr(self, part) > getattr(other, part):
return False
return False
class VersionType:
LTS = "lts"
@ -138,6 +159,14 @@ class VersionType:
VALID = (TESTING, PRESTABLE, STABLE, LTS)
def validate_version(version: str):
parts = version.split(".")
if len(parts) != 4:
raise ValueError(f"{version} does not contain 4 parts")
for part in parts:
int(part)
def get_abs_path(path: str) -> str:
return p.abspath(p.join(git.root, path))
@ -176,6 +205,29 @@ def get_version_from_repo(
)
def get_version_from_string(version: str) -> ClickHouseVersion:
validate_version(version)
parts = version.split(".")
return ClickHouseVersion(parts[0], parts[1], parts[2], -1, git, parts[3])
def get_version_from_tag(tag: str) -> ClickHouseVersion:
git.check_tag(tag)
tag = tag[1:].split("-")[0]
return get_version_from_string(tag)
def get_tagged_versions() -> List[ClickHouseVersion]:
versions = []
for tag in git.get_tags():
try:
version = get_version_from_tag(tag)
versions.append(version)
except Exception:
continue
return sorted(versions)
def update_cmake_version(
version: ClickHouseVersion,
versions_path: str = FILE_WITH_VERSION_PATH,
@ -185,22 +237,6 @@ def update_cmake_version(
f.write(VERSIONS_TEMPLATE.format_map(version.as_dict()))
def _update_changelog(repo_path: str, version: ClickHouseVersion):
cmd = """sed \
-e "s/[@]VERSION_STRING[@]/{version_str}/g" \
-e "s/[@]DATE[@]/{date}/g" \
-e "s/[@]AUTHOR[@]/clickhouse-release/g" \
-e "s/[@]EMAIL[@]/clickhouse-release@yandex-team.ru/g" \
< {in_path} > {changelog_path}
""".format(
version_str=version.string,
date=datetime.datetime.now().strftime("%a, %d %b %Y %H:%M:%S") + " +0300",
in_path=p.join(repo_path, CHANGELOG_IN_PATH),
changelog_path=p.join(repo_path, CHANGELOG_PATH),
)
subprocess.check_call(cmd, shell=True)
def update_contributors(
relative_contributors_path: str = GENERATED_CONTRIBUTORS, force: bool = False
):
@ -225,22 +261,10 @@ def update_contributors(
cfd.write(content)
def _update_dockerfile(repo_path: str, version: ClickHouseVersion):
version_str_for_docker = ".".join(
[str(version.major), str(version.minor), str(version.patch), "*"]
)
cmd = "ls -1 {path}/docker/*/Dockerfile | xargs sed -i -r -e 's/ARG version=.+$/ARG version='{ver}'/'".format(
path=repo_path, ver=version_str_for_docker
)
subprocess.check_call(cmd, shell=True)
def update_version_local(repo_path, version, version_type="testing"):
def update_version_local(version, version_type="testing"):
update_contributors()
version.with_description(version_type)
update_cmake_version(version)
_update_changelog(repo_path, version)
_update_dockerfile(repo_path, version)
def main():

View File

@ -22,5 +22,11 @@
</header>
</headers>
</url_with_headers>
<s3_conn>
<url>http://localhost:11111/test/</url>
<access_key_id>test</access_key_id>
<secret_access_key>testtest</secret_access_key>
<structure>auto</structure>
</s3_conn>
</named_collections>
</clickhouse>

View File

@ -44,7 +44,7 @@ def dotnet_container():
"-f",
docker_compose,
"up",
"--no-recreate",
"--force-recreate",
"-d",
"--no-build",
]

View File

@ -55,7 +55,7 @@ def golang_container():
"-f",
docker_compose,
"up",
"--no-recreate",
"--force-recreate",
"-d",
"--no-build",
]
@ -82,7 +82,7 @@ def php_container():
"-f",
docker_compose,
"up",
"--no-recreate",
"--force-recreate",
"-d",
"--no-build",
]
@ -109,7 +109,7 @@ def nodejs_container():
"-f",
docker_compose,
"up",
"--no-recreate",
"--force-recreate",
"-d",
"--no-build",
]
@ -136,7 +136,7 @@ def java_container():
"-f",
docker_compose,
"up",
"--no-recreate",
"--force-recreate",
"-d",
"--no-build",
]

View File

@ -56,7 +56,7 @@ def psql_client():
"-f",
docker_compose,
"up",
"--no-recreate",
"--force-recreate",
"-d",
"--build",
]
@ -99,7 +99,7 @@ def java_container():
"-f",
docker_compose,
"up",
"--no-recreate",
"--force-recreate",
"-d",
"--build",
]

View File

@ -129,6 +129,7 @@ def test_quota_from_users_xml():
1000,
"\\N",
"\\N",
"\\N",
]
]
)
@ -349,6 +350,7 @@ def test_tracking_quota():
"\\N",
"\\N",
"\\N",
"\\N",
]
]
)
@ -454,7 +456,7 @@ def test_exceed_quota():
]
)
system_quota_limits(
[["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]]
[["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N", "\\N"]]
)
system_quota_usage(
[
@ -545,6 +547,7 @@ def test_exceed_quota():
1000,
"\\N",
"\\N",
"\\N",
]
]
)
@ -634,6 +637,7 @@ def test_add_remove_interval():
1000,
"\\N",
"\\N",
"\\N",
]
]
)
@ -695,6 +699,7 @@ def test_add_remove_interval():
1000,
"\\N",
"\\N",
"\\N",
],
[
"myQuota",
@ -709,6 +714,7 @@ def test_add_remove_interval():
"\\N",
20000,
120,
"\\N",
],
]
)
@ -842,6 +848,7 @@ def test_add_remove_interval():
1000,
"\\N",
"\\N",
"\\N",
]
]
)
@ -1003,6 +1010,7 @@ def test_add_remove_interval():
1000,
"\\N",
"\\N",
"\\N",
]
]
)
@ -1064,6 +1072,7 @@ def test_add_remove_quota():
1000,
"\\N",
"\\N",
"\\N",
]
]
)
@ -1136,6 +1145,7 @@ def test_add_remove_quota():
1000,
"\\N",
"\\N",
"\\N",
],
[
"myQuota2",
@ -1150,6 +1160,7 @@ def test_add_remove_quota():
4000,
400000,
60,
"\\N",
],
[
"myQuota2",
@ -1164,6 +1175,7 @@ def test_add_remove_quota():
"\\N",
"\\N",
1800,
"\\N",
],
]
)
@ -1226,6 +1238,7 @@ def test_add_remove_quota():
1000,
"\\N",
"\\N",
"\\N",
]
]
)
@ -1294,6 +1307,7 @@ def test_add_remove_quota():
1000,
"\\N",
"\\N",
"\\N",
]
]
)
@ -1356,6 +1370,7 @@ def test_reload_users_xml_by_timer():
1000,
"\\N",
"\\N",
"\\N",
]
]
)
@ -1382,7 +1397,7 @@ def test_reload_users_xml_by_timer():
assert_eq_with_retry(
instance,
"SELECT * FROM system.quota_limits",
[["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]],
[["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N", "\\N"]],
)
@ -1481,15 +1496,15 @@ def test_dcl_management():
== "CREATE QUOTA qA FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default\n"
)
assert re.match(
"qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\n"
"qA\\t\\t.*\\t39446190\\t1\\t321\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
"qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\\t0\\t\\\\N\n"
"qA\\t\\t.*\\t39446190\\t1\\t321\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n",
instance.query("SHOW QUOTA"),
)
instance.query("SELECT * from test_table")
assert re.match(
"qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\n"
"qA\\t\\t.*\\t39446190\\t2\\t321\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
"qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\\t0\\t\\\\N\n"
"qA\\t\\t.*\\t39446190\\t2\\t321\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n",
instance.query("SHOW QUOTA"),
)
@ -1503,7 +1518,7 @@ def test_dcl_management():
instance.query("SELECT * from test_table")
assert re.match(
"qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
"qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n",
instance.query("SHOW QUOTA"),
)
@ -1519,7 +1534,7 @@ def test_dcl_management():
instance.query("SELECT * from test_table")
assert re.match(
"qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
"qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n",
instance.query("SHOW QUOTA"),
)
@ -1563,6 +1578,7 @@ def test_query_inserts():
1000,
"\\N",
"\\N",
"\\N",
]
]
)

View File

@ -554,6 +554,26 @@ def test_insert_select_schema_inference(started_cluster):
assert int(result) == 1
def test_virtual_columns_2(started_cluster):
hdfs_api = started_cluster.hdfs_api
table_function = (
f"hdfs('hdfs://hdfs1:9000/parquet_2', 'Parquet', 'a Int32, b String')"
)
node1.query(f"insert into table function {table_function} SELECT 1, 'kek'")
result = node1.query(f"SELECT _path FROM {table_function}")
assert result.strip() == "hdfs://hdfs1:9000/parquet_2"
table_function = (
f"hdfs('hdfs://hdfs1:9000/parquet_3', 'Parquet', 'a Int32, _path String')"
)
node1.query(f"insert into table function {table_function} SELECT 1, 'kek'")
result = node1.query(f"SELECT _path FROM {table_function}")
assert result.strip() == "kek"
if __name__ == "__main__":
cluster.start()
input("Cluster created, press any key to destroy...")

Some files were not shown because too many files have changed in this diff Show More