mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge branch 'master' into test/crash_35551
This commit is contained in:
commit
4d9bb9f6fd
@ -16,7 +16,6 @@ Checks: '-*,
|
||||
modernize-make-unique,
|
||||
modernize-raw-string-literal,
|
||||
modernize-redundant-void-arg,
|
||||
modernize-replace-auto-ptr,
|
||||
modernize-replace-random-shuffle,
|
||||
modernize-use-bool-literals,
|
||||
modernize-use-nullptr,
|
||||
|
28
.github/workflows/master.yml
vendored
28
.github/workflows/master.yml
vendored
@ -947,6 +947,34 @@ jobs:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
DockerServerImages:
|
||||
needs:
|
||||
- BuilderDebRelease
|
||||
- BuilderDebAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # otherwise we will have no version info
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head
|
||||
python3 docker_server.py --release-type head --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
############################################################################################
|
||||
##################################### BUILD REPORTER #######################################
|
||||
############################################################################################
|
||||
BuilderReport:
|
||||
|
31
.github/workflows/pull_request.yml
vendored
31
.github/workflows/pull_request.yml
vendored
@ -4,7 +4,7 @@ env:
|
||||
# Force the stdout and stderr streams to be unbuffered
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
on: # yamllint disable-line rule:truthy
|
||||
on: # yamllint disable-line rule:truthy
|
||||
pull_request:
|
||||
types:
|
||||
- synchronize
|
||||
@ -998,6 +998,34 @@ jobs:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
DockerServerImages:
|
||||
needs:
|
||||
- BuilderDebRelease
|
||||
- BuilderDebAarch64
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # otherwise we will have no version info
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head --no-push
|
||||
python3 docker_server.py --release-type head --no-push --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
############################################################################################
|
||||
##################################### BUILD REPORTER #######################################
|
||||
############################################################################################
|
||||
BuilderReport:
|
||||
@ -3138,6 +3166,7 @@ jobs:
|
||||
needs:
|
||||
- StyleCheck
|
||||
- DockerHubPush
|
||||
- DockerServerImages
|
||||
- CheckLabels
|
||||
- BuilderReport
|
||||
- FastTest
|
||||
|
25
.github/workflows/release.yml
vendored
25
.github/workflows/release.yml
vendored
@ -36,3 +36,28 @@ jobs:
|
||||
overwrite: true
|
||||
tag: ${{ github.ref }}
|
||||
file_glob: true
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
DockerServerImages:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # otherwise we will have no version info
|
||||
- name: Check docker clickhouse/clickhouse-server building
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type auto
|
||||
python3 docker_server.py --release-type auto --no-ubuntu \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
|
@ -1,12 +1,9 @@
|
||||
# During cross-compilation in our CI we have to use llvm-tblgen and other building tools
|
||||
# tools to be build for host architecture and everything else for target architecture (e.g. AArch64)
|
||||
# Possible workaround is to use llvm-tblgen from some package...
|
||||
# But lets just enable LLVM for native builds
|
||||
if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined")
|
||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
|
||||
if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined")
|
||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
|
||||
else()
|
||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
|
||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
|
||||
endif()
|
||||
|
||||
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
|
||||
|
||||
if (NOT ENABLE_EMBEDDED_COMPILER)
|
||||
|
2
debian/clickhouse-server.service
vendored
2
debian/clickhouse-server.service
vendored
@ -20,7 +20,7 @@ ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml
|
||||
EnvironmentFile=-/etc/default/clickhouse
|
||||
LimitCORE=infinity
|
||||
LimitNOFILE=500000
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
|
||||
|
||||
[Install]
|
||||
# ClickHouse should not start from the rescue shell (rescue.target).
|
||||
|
72
docker/keeper/Dockerfile
Normal file
72
docker/keeper/Dockerfile
Normal file
@ -0,0 +1,72 @@
|
||||
FROM ubuntu:20.04 AS glibc-donor
|
||||
|
||||
ARG TARGETARCH
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& case $arch in \
|
||||
amd64) rarch=x86_64 ;; \
|
||||
arm64) rarch=aarch64 ;; \
|
||||
esac \
|
||||
&& ln -s "${rarch}-linux-gnu" /lib/linux-gnu
|
||||
|
||||
|
||||
FROM alpine
|
||||
|
||||
ENV LANG=en_US.UTF-8 \
|
||||
LANGUAGE=en_US:en \
|
||||
LC_ALL=en_US.UTF-8 \
|
||||
TZ=UTC \
|
||||
CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml
|
||||
|
||||
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/
|
||||
COPY --from=glibc-donor /etc/nsswitch.conf /etc/
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& case $arch in \
|
||||
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \
|
||||
arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \
|
||||
esac
|
||||
|
||||
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
|
||||
ARG VERSION="22.4.1.917"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
# It is especially important for rootless containers: in that case entrypoint
|
||||
# can't do chown and owners of mounted volumes should be configured externally.
|
||||
# We do that in advance at the begining of Dockerfile before any packages will be
|
||||
# installed to prevent picking those uid / gid by some unrelated software.
|
||||
# The same uid / gid (101) is used both for alpine and ubuntu.
|
||||
|
||||
|
||||
ARG TARGETARCH
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& for package in ${PACKAGES}; do \
|
||||
{ \
|
||||
{ echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
|
||||
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \
|
||||
&& tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \
|
||||
} || \
|
||||
{ echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \
|
||||
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \
|
||||
&& tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \
|
||||
} ; \
|
||||
} || exit 1 \
|
||||
; done \
|
||||
&& rm /tmp/*.tgz /install -r \
|
||||
&& addgroup -S -g 101 clickhouse \
|
||||
&& adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse keeper" -u 101 clickhouse \
|
||||
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper \
|
||||
&& chown clickhouse:clickhouse /var/lib/clickhouse \
|
||||
&& chown root:clickhouse /var/log/clickhouse-keeper \
|
||||
&& chmod +x /entrypoint.sh \
|
||||
&& apk add --no-cache su-exec bash tzdata \
|
||||
&& cp /usr/share/zoneinfo/UTC /etc/localtime \
|
||||
&& echo "UTC" > /etc/timezone \
|
||||
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper
|
||||
|
||||
|
||||
EXPOSE 2181 10181 44444
|
||||
|
||||
VOLUME /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
1
docker/keeper/Dockerfile.alpine
Symbolic link
1
docker/keeper/Dockerfile.alpine
Symbolic link
@ -0,0 +1 @@
|
||||
Dockerfile
|
93
docker/keeper/entrypoint.sh
Normal file
93
docker/keeper/entrypoint.sh
Normal file
@ -0,0 +1,93 @@
|
||||
#!/bin/bash
|
||||
|
||||
set +x
|
||||
set -eo pipefail
|
||||
shopt -s nullglob
|
||||
|
||||
DO_CHOWN=1
|
||||
if [ "${CLICKHOUSE_DO_NOT_CHOWN:-0}" = "1" ]; then
|
||||
DO_CHOWN=0
|
||||
fi
|
||||
|
||||
CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
|
||||
CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
|
||||
|
||||
# support --user
|
||||
if [ "$(id -u)" = "0" ]; then
|
||||
USER=$CLICKHOUSE_UID
|
||||
GROUP=$CLICKHOUSE_GID
|
||||
if command -v gosu &> /dev/null; then
|
||||
gosu="gosu $USER:$GROUP"
|
||||
elif command -v su-exec &> /dev/null; then
|
||||
gosu="su-exec $USER:$GROUP"
|
||||
else
|
||||
echo "No gosu/su-exec detected!"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
USER="$(id -u)"
|
||||
GROUP="$(id -g)"
|
||||
gosu=""
|
||||
DO_CHOWN=0
|
||||
fi
|
||||
|
||||
KEEPER_CONFIG="${KEEPER_CONFIG:-/etc/clickhouse-keeper/config.yaml}"
|
||||
|
||||
if [ -f "$KEEPER_CONFIG" ] && ! $gosu test -f "$KEEPER_CONFIG" -a -r "$KEEPER_CONFIG"; then
|
||||
echo "Configuration file '$KEEPER_CONFIG' isn't readable by user with id '$USER'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DATA_DIR="${CLICKHOUSE_DATA_DIR:-/var/lib/clickhouse}"
|
||||
LOG_DIR="${LOG_DIR:-/var/log/clickhouse-keeper}"
|
||||
LOG_PATH="${LOG_DIR}/clickhouse-keeper.log"
|
||||
ERROR_LOG_PATH="${LOG_DIR}/clickhouse-keeper.err.log"
|
||||
COORDINATION_LOG_DIR="${DATA_DIR}/coordination/log"
|
||||
COORDINATION_SNAPSHOT_DIR="${DATA_DIR}/coordination/snapshots"
|
||||
CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0}
|
||||
|
||||
for dir in "$DATA_DIR" \
|
||||
"$LOG_DIR" \
|
||||
"$TMP_DIR" \
|
||||
"$COORDINATION_LOG_DIR" \
|
||||
"$COORDINATION_SNAPSHOT_DIR"
|
||||
do
|
||||
# check if variable not empty
|
||||
[ -z "$dir" ] && continue
|
||||
# ensure directories exist
|
||||
if ! mkdir -p "$dir"; then
|
||||
echo "Couldn't create necessary directory: $dir"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$DO_CHOWN" = "1" ]; then
|
||||
# ensure proper directories permissions
|
||||
# but skip it for if directory already has proper premissions, cause recursive chown may be slow
|
||||
if [ "$(stat -c %u "$dir")" != "$USER" ] || [ "$(stat -c %g "$dir")" != "$GROUP" ]; then
|
||||
chown -R "$USER:$GROUP" "$dir"
|
||||
fi
|
||||
elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then
|
||||
echo "Necessary directory '$dir' isn't accessible by user with id '$USER'"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
|
||||
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
|
||||
# Watchdog is launched by default, but does not send SIGINT to the main process,
|
||||
# so the container can't be finished by ctrl+c
|
||||
export CLICKHOUSE_WATCHDOG_ENABLE
|
||||
|
||||
cd /var/lib/clickhouse
|
||||
|
||||
# There is a config file. It is already tested with gosu (if it is readably by keeper user)
|
||||
if [ -f "$KEEPER_CONFIG" ]; then
|
||||
exec $gosu /usr/bin/clickhouse-keeper --config-file="$KEEPER_CONFIG" --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@"
|
||||
fi
|
||||
|
||||
# There is no config file. Will use embedded one
|
||||
exec $gosu /usr/bin/clickhouse-keeper --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@"
|
||||
fi
|
||||
|
||||
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
|
||||
exec "$@"
|
2
docker/server/.gitignore
vendored
2
docker/server/.gitignore
vendored
@ -1,2 +0,0 @@
|
||||
alpine-root/*
|
||||
tgz-packages/*
|
@ -1,122 +0,0 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
# ARG for quick switch to a given ubuntu mirror
|
||||
ARG apt_archive="http://archive.ubuntu.com"
|
||||
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
|
||||
|
||||
ARG repository="deb https://packages.clickhouse.com/deb stable main"
|
||||
ARG version=22.1.1.*
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
# from debs created by CI build, for example:
|
||||
# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852
|
||||
ARG deb_location_url=""
|
||||
|
||||
# set non-empty single_binary_location_url to create docker image
|
||||
# from a single binary url (useful for non-standard builds - with sanitizers, for arm64).
|
||||
# for example (run on aarch64 server):
|
||||
# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.com/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm
|
||||
# note: clickhouse-odbc-bridge is not supported there.
|
||||
ARG single_binary_location_url=""
|
||||
|
||||
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
# It is especially important for rootless containers: in that case entrypoint
|
||||
# can't do chown and owners of mounted volumes should be configured externally.
|
||||
# We do that in advance at the begining of Dockerfile before any packages will be
|
||||
# installed to prevent picking those uid / gid by some unrelated software.
|
||||
# The same uid / gid (101) is used both for alpine and ubuntu.
|
||||
|
||||
# To drop privileges, we need 'su' command, that simply changes uid and gid.
|
||||
# In fact, the 'su' command from Linux is not so simple, due to inherent vulnerability in Linux:
|
||||
# https://ruderich.org/simon/notes/su-sudo-from-root-tty-hijacking
|
||||
# It has to mitigate this drawback of Linux, and to do this, 'su' command is creating it's own pseudo-terminal
|
||||
# and forwarding commands. Due to some ridiculous curcumstances, it does not work in Docker (or it does)
|
||||
# and for these reasons people are using alternatives to the 'su' command in Docker,
|
||||
# that don't mess with the terminal, don't care about closing the opened files, etc...
|
||||
# but can only be safe to drop privileges inside Docker.
|
||||
# The question - what implementation of 'su' command to use.
|
||||
# It should be a simple script doing about just two syscalls.
|
||||
# Some people tend to use 'gosu' tool that is written in Go.
|
||||
# It is not used for several reasons:
|
||||
# 1. Dependency on some foreign code in yet another programming language - does not sound alright.
|
||||
# 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners.
|
||||
|
||||
COPY su-exec.c /su-exec.c
|
||||
|
||||
RUN groupadd -r clickhouse --gid=101 \
|
||||
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
|
||||
&& apt-get update \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
apt-transport-https \
|
||||
ca-certificates \
|
||||
dirmngr \
|
||||
gnupg \
|
||||
locales \
|
||||
wget \
|
||||
tzdata \
|
||||
&& mkdir -p /etc/apt/sources.list.d \
|
||||
&& apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \
|
||||
&& echo $repository > /etc/apt/sources.list.d/clickhouse.list \
|
||||
&& if [ -n "$deb_location_url" ]; then \
|
||||
echo "installing from custom url with deb packages: $deb_location_url" \
|
||||
rm -rf /tmp/clickhouse_debs \
|
||||
&& mkdir -p /tmp/clickhouse_debs \
|
||||
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-common-static_${version}_amd64.deb" -P /tmp/clickhouse_debs \
|
||||
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-client_${version}_all.deb" -P /tmp/clickhouse_debs \
|
||||
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-server_${version}_all.deb" -P /tmp/clickhouse_debs \
|
||||
&& dpkg -i /tmp/clickhouse_debs/*.deb ; \
|
||||
elif [ -n "$single_binary_location_url" ]; then \
|
||||
echo "installing from single binary url: $single_binary_location_url" \
|
||||
&& rm -rf /tmp/clickhouse_binary \
|
||||
&& mkdir -p /tmp/clickhouse_binary \
|
||||
&& wget --progress=bar:force:noscroll "$single_binary_location_url" -O /tmp/clickhouse_binary/clickhouse \
|
||||
&& chmod +x /tmp/clickhouse_binary/clickhouse \
|
||||
&& /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \
|
||||
else \
|
||||
echo "installing from repository: $repository" \
|
||||
&& apt-get update \
|
||||
&& apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \
|
||||
&& apt-get install --allow-unauthenticated --yes --no-install-recommends \
|
||||
clickhouse-common-static=$version \
|
||||
clickhouse-client=$version \
|
||||
clickhouse-server=$version ; \
|
||||
fi \
|
||||
&& apt-get install -y --no-install-recommends tcc libc-dev && \
|
||||
tcc /su-exec.c -o /bin/su-exec && \
|
||||
chown root:root /bin/su-exec && \
|
||||
chmod 0755 /bin/su-exec && \
|
||||
rm /su-exec.c && \
|
||||
apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \
|
||||
&& clickhouse-local -q 'SELECT * FROM system.build_options' \
|
||||
&& rm -rf \
|
||||
/var/lib/apt/lists/* \
|
||||
/var/cache/debconf \
|
||||
/tmp/* \
|
||||
&& apt-get clean \
|
||||
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
|
||||
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
|
||||
|
||||
# we need to allow "others" access to clickhouse folder, because docker container
|
||||
# can be started with arbitrary uid (openshift usecase)
|
||||
|
||||
RUN locale-gen en_US.UTF-8
|
||||
ENV LANG en_US.UTF-8
|
||||
ENV LANGUAGE en_US:en
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
ENV TZ UTC
|
||||
|
||||
RUN mkdir /docker-entrypoint-initdb.d
|
||||
|
||||
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
|
||||
EXPOSE 9000 8123 9009
|
||||
VOLUME /var/lib/clickhouse
|
||||
|
||||
ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
1
docker/server/Dockerfile
Symbolic link
1
docker/server/Dockerfile
Symbolic link
@ -0,0 +1 @@
|
||||
Dockerfile.ubuntu
|
@ -1,3 +1,14 @@
|
||||
FROM ubuntu:20.04 AS glibc-donor
|
||||
ARG TARGETARCH
|
||||
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& case $arch in \
|
||||
amd64) rarch=x86_64 ;; \
|
||||
arm64) rarch=aarch64 ;; \
|
||||
esac \
|
||||
&& ln -s "${rarch}-linux-gnu" /lib/linux-gnu
|
||||
|
||||
|
||||
FROM alpine
|
||||
|
||||
ENV LANG=en_US.UTF-8 \
|
||||
@ -6,7 +17,24 @@ ENV LANG=en_US.UTF-8 \
|
||||
TZ=UTC \
|
||||
CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml
|
||||
|
||||
COPY alpine-root/ /
|
||||
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/
|
||||
COPY --from=glibc-donor /etc/nsswitch.conf /etc/
|
||||
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
|
||||
ARG TARGETARCH
|
||||
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& case $arch in \
|
||||
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \
|
||||
arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \
|
||||
esac
|
||||
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="20.9.3.45"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
# It is especially important for rootless containers: in that case entrypoint
|
||||
@ -15,9 +43,23 @@ COPY alpine-root/ /
|
||||
# installed to prevent picking those uid / gid by some unrelated software.
|
||||
# The same uid / gid (101) is used both for alpine and ubuntu.
|
||||
|
||||
RUN addgroup -S -g 101 clickhouse \
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& for package in ${PACKAGES}; do \
|
||||
{ \
|
||||
{ echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
|
||||
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \
|
||||
&& tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \
|
||||
} || \
|
||||
{ echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \
|
||||
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \
|
||||
&& tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \
|
||||
} ; \
|
||||
} || exit 1 \
|
||||
; done \
|
||||
&& rm /tmp/*.tgz /install -r \
|
||||
&& addgroup -S -g 101 clickhouse \
|
||||
&& adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse server" -u 101 clickhouse \
|
||||
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
|
||||
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server/config.d /etc/clickhouse-server/users.d /etc/clickhouse-client /docker-entrypoint-initdb.d \
|
||||
&& chown clickhouse:clickhouse /var/lib/clickhouse \
|
||||
&& chown root:clickhouse /var/log/clickhouse-server \
|
||||
&& chmod +x /entrypoint.sh \
|
||||
|
128
docker/server/Dockerfile.ubuntu
Normal file
128
docker/server/Dockerfile.ubuntu
Normal file
@ -0,0 +1,128 @@
|
||||
FROM ubuntu:20.04
|
||||
|
||||
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
COPY su-exec.c /su-exec.c
|
||||
|
||||
# ARG for quick switch to a given ubuntu mirror
|
||||
ARG apt_archive="http://archive.ubuntu.com"
|
||||
RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list \
|
||||
&& groupadd -r clickhouse --gid=101 \
|
||||
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
|
||||
&& apt-get update \
|
||||
&& apt-get install --yes --no-install-recommends \
|
||||
apt-transport-https \
|
||||
ca-certificates \
|
||||
dirmngr \
|
||||
gnupg \
|
||||
locales \
|
||||
wget \
|
||||
tzdata \
|
||||
&& apt-get install -y --no-install-recommends tcc libc-dev && \
|
||||
tcc /su-exec.c -o /bin/su-exec && \
|
||||
chown root:root /bin/su-exec && \
|
||||
chmod 0755 /bin/su-exec && \
|
||||
rm /su-exec.c && \
|
||||
apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \
|
||||
&& apt-get clean
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION=22.1.1.*
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
# from debs created by CI build, for example:
|
||||
# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852
|
||||
ARG deb_location_url=""
|
||||
|
||||
# set non-empty single_binary_location_url to create docker image
|
||||
# from a single binary url (useful for non-standard builds - with sanitizers, for arm64).
|
||||
# for example (run on aarch64 server):
|
||||
# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.com/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm
|
||||
# note: clickhouse-odbc-bridge is not supported there.
|
||||
ARG single_binary_location_url=""
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
# It is especially important for rootless containers: in that case entrypoint
|
||||
# can't do chown and owners of mounted volumes should be configured externally.
|
||||
# We do that in advance at the begining of Dockerfile before any packages will be
|
||||
# installed to prevent picking those uid / gid by some unrelated software.
|
||||
# The same uid / gid (101) is used both for alpine and ubuntu.
|
||||
|
||||
# To drop privileges, we need 'su' command, that simply changes uid and gid.
|
||||
# In fact, the 'su' command from Linux is not so simple, due to inherent vulnerability in Linux:
|
||||
# https://ruderich.org/simon/notes/su-sudo-from-root-tty-hijacking
|
||||
# It has to mitigate this drawback of Linux, and to do this, 'su' command is creating it's own pseudo-terminal
|
||||
# and forwarding commands. Due to some ridiculous curcumstances, it does not work in Docker (or it does)
|
||||
# and for these reasons people are using alternatives to the 'su' command in Docker,
|
||||
# that don't mess with the terminal, don't care about closing the opened files, etc...
|
||||
# but can only be safe to drop privileges inside Docker.
|
||||
# The question - what implementation of 'su' command to use.
|
||||
# It should be a simple script doing about just two syscalls.
|
||||
# Some people tend to use 'gosu' tool that is written in Go.
|
||||
# It is not used for several reasons:
|
||||
# 1. Dependency on some foreign code in yet another programming language - does not sound alright.
|
||||
# 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners.
|
||||
|
||||
ARG TARGETARCH
|
||||
|
||||
RUN arch=${TARGETARCH:-amd64} \
|
||||
&& if [ -n "${deb_location_url}" ]; then \
|
||||
echo "installing from custom url with deb packages: ${deb_location_url}" \
|
||||
rm -rf /tmp/clickhouse_debs \
|
||||
&& mkdir -p /tmp/clickhouse_debs \
|
||||
&& for package in ${PACKAGES}; do \
|
||||
{ wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_${arch}.deb" -P /tmp/clickhouse_debs || \
|
||||
wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_all.deb" -P /tmp/clickhouse_debs ; } \
|
||||
|| exit 1 \
|
||||
; done \
|
||||
&& dpkg -i /tmp/clickhouse_debs/*.deb ; \
|
||||
elif [ -n "${single_binary_location_url}" ]; then \
|
||||
echo "installing from single binary url: ${single_binary_location_url}" \
|
||||
&& rm -rf /tmp/clickhouse_binary \
|
||||
&& mkdir -p /tmp/clickhouse_binary \
|
||||
&& wget --progress=bar:force:noscroll "${single_binary_location_url}" -O /tmp/clickhouse_binary/clickhouse \
|
||||
&& chmod +x /tmp/clickhouse_binary/clickhouse \
|
||||
&& /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \
|
||||
else \
|
||||
mkdir -p /etc/apt/sources.list.d \
|
||||
&& apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \
|
||||
&& echo ${REPOSITORY} > /etc/apt/sources.list.d/clickhouse.list \
|
||||
&& echo "installing from repository: ${REPOSITORY}" \
|
||||
&& apt-get update \
|
||||
&& apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \
|
||||
&& for package in ${PACKAGES}; do \
|
||||
apt-get install --allow-unauthenticated --yes --no-install-recommends "${package}=${VERSION}" || exit 1 \
|
||||
; done \
|
||||
; fi \
|
||||
&& clickhouse-local -q 'SELECT * FROM system.build_options' \
|
||||
&& rm -rf \
|
||||
/var/lib/apt/lists/* \
|
||||
/var/cache/debconf \
|
||||
/tmp/* \
|
||||
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
|
||||
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
|
||||
|
||||
# we need to allow "others" access to clickhouse folder, because docker container
|
||||
# can be started with arbitrary uid (openshift usecase)
|
||||
|
||||
RUN locale-gen en_US.UTF-8
|
||||
ENV LANG en_US.UTF-8
|
||||
ENV LANGUAGE en_US:en
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
ENV TZ UTC
|
||||
|
||||
RUN mkdir /docker-entrypoint-initdb.d
|
||||
|
||||
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
|
||||
EXPOSE 9000 8123 9009
|
||||
VOLUME /var/lib/clickhouse
|
||||
|
||||
ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
@ -1,63 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc
|
||||
REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}"
|
||||
VERSION="${VERSION:-20.9.3.45}"
|
||||
DOCKER_IMAGE="${DOCKER_IMAGE:-clickhouse/clickhouse-server}"
|
||||
|
||||
# where original files live
|
||||
DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
|
||||
|
||||
# we will create root for our image here
|
||||
CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root"
|
||||
|
||||
# clean up the root from old runs, it's reconstructed each time
|
||||
rm -rf "$CONTAINER_ROOT_FOLDER"
|
||||
mkdir -p "$CONTAINER_ROOT_FOLDER"
|
||||
|
||||
# where to put downloaded tgz
|
||||
TGZ_PACKAGES_FOLDER="${DOCKER_BUILD_FOLDER}/tgz-packages"
|
||||
mkdir -p "$TGZ_PACKAGES_FOLDER"
|
||||
|
||||
PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
|
||||
|
||||
# download tars from the repo
|
||||
for package in "${PACKAGES[@]}"
|
||||
do
|
||||
wget -c -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
|
||||
done
|
||||
|
||||
# unpack tars
|
||||
for package in "${PACKAGES[@]}"
|
||||
do
|
||||
tar xvzf "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" --strip-components=2 -C "$CONTAINER_ROOT_FOLDER"
|
||||
done
|
||||
|
||||
# prepare few more folders
|
||||
mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \
|
||||
"${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d" \
|
||||
"${CONTAINER_ROOT_FOLDER}/var/log/clickhouse-server" \
|
||||
"${CONTAINER_ROOT_FOLDER}/var/lib/clickhouse" \
|
||||
"${CONTAINER_ROOT_FOLDER}/docker-entrypoint-initdb.d" \
|
||||
"${CONTAINER_ROOT_FOLDER}/lib64"
|
||||
|
||||
cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/"
|
||||
cp "${DOCKER_BUILD_FOLDER}/entrypoint.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"
|
||||
|
||||
## get glibc components from ubuntu 20.04 and put them to expected place
|
||||
docker pull ubuntu:20.04
|
||||
ubuntu20image=$(docker create --rm ubuntu:20.04)
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libc.so.6 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libdl.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libm.so.6 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/librt.so.1 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_files.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64"
|
||||
docker cp -L "${ubuntu20image}":/etc/nsswitch.conf "${CONTAINER_ROOT_FOLDER}/etc"
|
||||
|
||||
docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull
|
||||
rm -rf "$CONTAINER_ROOT_FOLDER"
|
@ -1,47 +0,0 @@
|
||||
# Since right now we can't set volumes to the docker during build, we split building container in stages:
|
||||
# 1. build base container
|
||||
# 2. run base conatiner with mounted volumes
|
||||
# 3. commit container as image
|
||||
# 4. build final container atop that image
|
||||
# Middle steps are performed by the bash script.
|
||||
|
||||
FROM ubuntu:18.04 as clickhouse-server-base
|
||||
ARG gosu_ver=1.14
|
||||
|
||||
VOLUME /packages/
|
||||
|
||||
# update to allow installing dependencies of clickhouse automatically
|
||||
RUN apt update; \
|
||||
DEBIAN_FRONTEND=noninteractive \
|
||||
apt install -y locales;
|
||||
|
||||
ADD https://github.com/tianon/gosu/releases/download/${gosu_ver}/gosu-amd64 /bin/gosu
|
||||
|
||||
RUN locale-gen en_US.UTF-8
|
||||
ENV LANG en_US.UTF-8
|
||||
ENV LANGUAGE en_US:en
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
|
||||
# installing via apt to simulate real-world scenario, where user installs deb package and all it's dependecies automatically.
|
||||
CMD DEBIAN_FRONTEND=noninteractive \
|
||||
apt install -y \
|
||||
/packages/clickhouse-common-static_*.deb \
|
||||
/packages/clickhouse-server_*.deb ;
|
||||
|
||||
FROM clickhouse-server-base:postinstall as clickhouse-server
|
||||
|
||||
RUN mkdir /docker-entrypoint-initdb.d
|
||||
|
||||
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
|
||||
RUN chmod +x \
|
||||
/entrypoint.sh \
|
||||
/bin/gosu
|
||||
|
||||
EXPOSE 9000 8123 9009
|
||||
VOLUME /var/lib/clickhouse
|
||||
|
||||
ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
@ -1,86 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e -x
|
||||
|
||||
# Not sure why shellcheck complains that rc is not assigned before it is referenced.
|
||||
# shellcheck disable=SC2154
|
||||
trap 'rc=$?; echo EXITED WITH: $rc; exit $rc' EXIT
|
||||
|
||||
# CLI option to prevent rebuilding images, just re-run tests with images leftover from previuos time
|
||||
readonly NO_REBUILD_FLAG="--no-rebuild"
|
||||
|
||||
readonly CLICKHOUSE_DOCKER_DIR="$(realpath "${1}")"
|
||||
readonly CLICKHOUSE_PACKAGES_ARG="${2}"
|
||||
CLICKHOUSE_SERVER_IMAGE="${3}"
|
||||
|
||||
if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then
|
||||
readonly CLICKHOUSE_PACKAGES_DIR="$(realpath "${2}")" # or --no-rebuild
|
||||
fi
|
||||
|
||||
|
||||
# In order to allow packages directory to be anywhere, and to reduce amount of context sent to the docker daemon,
|
||||
# all images are built in multiple stages:
|
||||
# 1. build base image, install dependencies
|
||||
# 2. run image with volume mounted, install what needed from those volumes
|
||||
# 3. tag container as image
|
||||
# 4. [optional] build another image atop of tagged.
|
||||
|
||||
# TODO: optionally mount most recent clickhouse-test and queries directory from local machine
|
||||
|
||||
if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then
|
||||
docker build --network=host \
|
||||
-f "${CLICKHOUSE_DOCKER_DIR}/test/stateless/clickhouse-statelest-test-runner.Dockerfile" \
|
||||
--target clickhouse-test-runner-base \
|
||||
-t clickhouse-test-runner-base:preinstall \
|
||||
"${CLICKHOUSE_DOCKER_DIR}/test/stateless"
|
||||
|
||||
docker rm -f clickhouse-test-runner-installing-packages || true
|
||||
docker run --network=host \
|
||||
-v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \
|
||||
--name clickhouse-test-runner-installing-packages \
|
||||
clickhouse-test-runner-base:preinstall
|
||||
docker commit clickhouse-test-runner-installing-packages clickhouse-statelest-test-runner:local
|
||||
docker rm -f clickhouse-test-runner-installing-packages || true
|
||||
fi
|
||||
|
||||
# # Create a bind-volume to the clickhouse-test script file
|
||||
# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/tests/clickhouse-test --opt o=bind clickhouse-test-script-volume
|
||||
# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/tests/queries --opt o=bind clickhouse-test-queries-dir-volume
|
||||
|
||||
# Build server image (optional) from local packages
|
||||
if [ -z "${CLICKHOUSE_SERVER_IMAGE}" ]; then
|
||||
CLICKHOUSE_SERVER_IMAGE="clickhouse/server:local"
|
||||
|
||||
if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then
|
||||
docker build --network=host \
|
||||
-f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \
|
||||
--target clickhouse-server-base \
|
||||
-t clickhouse-server-base:preinstall \
|
||||
"${CLICKHOUSE_DOCKER_DIR}/server"
|
||||
|
||||
docker rm -f clickhouse_server_base_installing_server || true
|
||||
docker run --network=host -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \
|
||||
--name clickhouse_server_base_installing_server \
|
||||
clickhouse-server-base:preinstall
|
||||
docker commit clickhouse_server_base_installing_server clickhouse-server-base:postinstall
|
||||
|
||||
docker build --network=host \
|
||||
-f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \
|
||||
--target clickhouse-server \
|
||||
-t "${CLICKHOUSE_SERVER_IMAGE}" \
|
||||
"${CLICKHOUSE_DOCKER_DIR}/server"
|
||||
fi
|
||||
fi
|
||||
|
||||
docker rm -f test-runner || true
|
||||
docker-compose down
|
||||
CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \
|
||||
docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \
|
||||
create \
|
||||
--build --force-recreate
|
||||
|
||||
CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \
|
||||
docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \
|
||||
run \
|
||||
--name test-runner \
|
||||
test-runner
|
@ -1,34 +0,0 @@
|
||||
version: "2"
|
||||
|
||||
services:
|
||||
clickhouse-server:
|
||||
image: ${CLICKHOUSE_SERVER_IMAGE}
|
||||
expose:
|
||||
- "8123" # HTTP
|
||||
- "9000" # TCP
|
||||
- "9009" # HTTP-interserver
|
||||
restart: "no"
|
||||
|
||||
test-runner:
|
||||
image: clickhouse-statelest-test-runner:local
|
||||
|
||||
restart: "no"
|
||||
depends_on:
|
||||
- clickhouse-server
|
||||
environment:
|
||||
# these are used by clickhouse-test to point clickhouse-client to the right server
|
||||
- CLICKHOUSE_HOST=clickhouse-server
|
||||
- CLICKHOUSE_PORT=9009
|
||||
- CLICKHOUSE_TEST_HOST_EXPOSED_PORT=51234
|
||||
expose:
|
||||
# port for any test to serve data to clickhouse-server on rare occasion (like URL-engine tables in 00646),
|
||||
# should match value of CLICKHOUSE_TEST_HOST_EXPOSED_PORT above
|
||||
- "51234"
|
||||
|
||||
# NOTE: Dev-mode: mount newest versions of the queries and clickhouse-test script into container.
|
||||
# volumes:
|
||||
# - /home/enmk/proj/ClickHouse_master/tests/queries:/usr/share/clickhouse-test/queries:ro
|
||||
# - /home/enmk/proj/ClickHouse_master/tests/clickhouse-test:/usr/bin/clickhouse-test:ro
|
||||
|
||||
# String-form instead of list-form to allow multiple arguments in "${CLICKHOUSE_TEST_ARGS}"
|
||||
entrypoint: "clickhouse-test ${CLICKHOUSE_TEST_ARGS}"
|
@ -36,6 +36,7 @@ Example of configuration:
|
||||
<access_key_id>AKIAIOSFODNN7EXAMPLE</access_key_id>
|
||||
<secret_access_key> wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY</secret_access_key>
|
||||
<format>CSV</format>
|
||||
<url>https://s3.us-east-1.amazonaws.com/yourbucket/mydata/</url>
|
||||
</s3_mydata>
|
||||
</named_collections>
|
||||
</clickhouse>
|
||||
@ -44,12 +45,12 @@ Example of configuration:
|
||||
### Example of using named connections with the s3 function
|
||||
|
||||
```sql
|
||||
INSERT INTO FUNCTION s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz',
|
||||
INSERT INTO FUNCTION s3(s3_mydata, filename = 'test_file.tsv.gz',
|
||||
format = 'TSV', structure = 'number UInt64', compression_method = 'gzip')
|
||||
SELECT * FROM numbers(10000);
|
||||
|
||||
SELECT count()
|
||||
FROM s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz')
|
||||
FROM s3(s3_mydata, filename = 'test_file.tsv.gz')
|
||||
|
||||
┌─count()─┐
|
||||
│ 10000 │
|
||||
|
@ -114,9 +114,9 @@ In addition, this column is not substituted when using an asterisk in a SELECT q
|
||||
|
||||
### EPHEMERAL {#ephemeral}
|
||||
|
||||
`EPHEMERAL expr`
|
||||
`EPHEMERAL [expr]`
|
||||
|
||||
Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement.
|
||||
Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required.
|
||||
INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
|
||||
|
||||
### ALIAS {#alias}
|
||||
|
@ -110,9 +110,9 @@ SELECT x, toTypeName(x) FROM t1;
|
||||
|
||||
### EPHEMERAL {#ephemeral}
|
||||
|
||||
`EPHEMERAL expr`
|
||||
`EPHEMERAL [expr]`
|
||||
|
||||
Эфемерное выражение. Такой столбец не хранится в таблице и не может быть получен в запросе SELECT, но на него можно ссылаться в выражениях по умолчанию запроса CREATE.
|
||||
Эфемерное выражение. Такой столбец не хранится в таблице и не может быть получен в запросе SELECT, но на него можно ссылаться в выражениях по умолчанию запроса CREATE. Если значение по умолчанию `expr` не указано, то тип колонки должен быть специфицирован.
|
||||
INSERT без списка столбцов игнорирует этот столбец, таким образом сохраняется инвариант - т.е. дамп, полученный путём `SELECT *`, можно вставить обратно в таблицу INSERT-ом без указания списка столбцов.
|
||||
|
||||
### ALIAS {#alias}
|
||||
|
@ -16,7 +16,7 @@ jsmin==3.0.0
|
||||
livereload==2.6.3
|
||||
Markdown==3.3.2
|
||||
MarkupSafe==2.1.0
|
||||
mkdocs==1.1.2
|
||||
mkdocs==1.3.0
|
||||
mkdocs-htmlproofer-plugin==0.0.3
|
||||
mkdocs-macros-plugin==0.4.20
|
||||
nltk==3.7
|
||||
|
@ -20,7 +20,7 @@ ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml
|
||||
EnvironmentFile=-/etc/default/clickhouse
|
||||
LimitCORE=infinity
|
||||
LimitNOFILE=500000
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
|
||||
|
||||
[Install]
|
||||
# ClickHouse should not start from the rescue shell (rescue.target).
|
||||
|
@ -148,13 +148,13 @@
|
||||
<!-- <interserver_https_port>9010</interserver_https_port> -->
|
||||
|
||||
<!-- Hostname that is used by other replicas to request this server.
|
||||
If not specified, than it is determined analogous to 'hostname -f' command.
|
||||
If not specified, then it is determined analogous to 'hostname -f' command.
|
||||
This setting could be used to switch replication to another network interface
|
||||
(the server may be connected to multiple networks via multiple addresses)
|
||||
-->
|
||||
|
||||
<!--
|
||||
<interserver_http_host>example.yandex.ru</interserver_http_host>
|
||||
<interserver_http_host>example.clickhouse.com</interserver_http_host>
|
||||
-->
|
||||
|
||||
<!-- You can specify credentials for authenthication between replicas.
|
||||
@ -765,14 +765,14 @@
|
||||
-->
|
||||
<!--<remote_url_allow_hosts>-->
|
||||
<!-- Host should be specified exactly as in URL. The name is checked before DNS resolution.
|
||||
Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts.
|
||||
Example: "clickhouse.com", "clickhouse.com." and "www.clickhouse.com" are different hosts.
|
||||
If port is explicitly specified in URL, the host:port is checked as a whole.
|
||||
If host specified here without port, any port with this host allowed.
|
||||
"yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed.
|
||||
"clickhouse.com" -> "clickhouse.com:443", "clickhouse.com:80" etc. is allowed, but "clickhouse.com:80" -> only "clickhouse.com:80" is allowed.
|
||||
If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]".
|
||||
If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked.
|
||||
Host should be specified using the host xml tag:
|
||||
<host>yandex.ru</host>
|
||||
<host>clickhouse.com</host>
|
||||
-->
|
||||
|
||||
<!-- Regular expression can be specified. RE2 engine is used for regexps.
|
||||
@ -1030,25 +1030,17 @@
|
||||
<flush_interval_milliseconds>1000</flush_interval_milliseconds>
|
||||
</crash_log>
|
||||
|
||||
<!-- Session log. Stores user log in (successful or not) and log out events. -->
|
||||
<session_log>
|
||||
<!-- Session log. Stores user log in (successful or not) and log out events.
|
||||
|
||||
Note: session log has known security issues and should not be used in production.
|
||||
-->
|
||||
<!-- <session_log>
|
||||
<database>system</database>
|
||||
<table>session_log</table>
|
||||
|
||||
<partition_by>toYYYYMM(event_date)</partition_by>
|
||||
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
|
||||
</session_log>
|
||||
|
||||
<!-- Parameters for embedded dictionaries, used in Yandex.Metrica.
|
||||
See https://clickhouse.com/docs/en/dicts/internal_dicts/
|
||||
-->
|
||||
|
||||
<!-- Path to file with region hierarchy. -->
|
||||
<!-- <path_to_regions_hierarchy_file>/opt/geo/regions_hierarchy.txt</path_to_regions_hierarchy_file> -->
|
||||
|
||||
<!-- Path to directory with files containing names of regions -->
|
||||
<!-- <path_to_regions_names_files>/opt/geo/</path_to_regions_names_files> -->
|
||||
|
||||
</session_log> -->
|
||||
|
||||
<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
|
||||
<!-- Custom TLD lists.
|
||||
|
@ -103,7 +103,7 @@ interserver_http_port: 9009
|
||||
# If not specified, than it is determined analogous to 'hostname -f' command.
|
||||
# This setting could be used to switch replication to another network interface
|
||||
# (the server may be connected to multiple networks via multiple addresses)
|
||||
# interserver_http_host: example.yandex.ru
|
||||
# interserver_http_host: example.clickhouse.com
|
||||
|
||||
# You can specify credentials for authenthication between replicas.
|
||||
# This is required when interserver_https_port is accessible from untrusted networks,
|
||||
@ -592,10 +592,10 @@ remote_servers:
|
||||
# remote_url_allow_hosts:
|
||||
|
||||
# Host should be specified exactly as in URL. The name is checked before DNS resolution.
|
||||
# Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts.
|
||||
# Example: "clickhouse.com", "clickhouse.com." and "www.clickhouse.com" are different hosts.
|
||||
# If port is explicitly specified in URL, the host:port is checked as a whole.
|
||||
# If host specified here without port, any port with this host allowed.
|
||||
# "yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed.
|
||||
# "clickhouse.com" -> "clickhouse.com:443", "clickhouse.com:80" etc. is allowed, but "clickhouse.com:80" -> only "clickhouse.com:80" is allowed.
|
||||
# If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]".
|
||||
# If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked.
|
||||
|
||||
@ -803,16 +803,6 @@ crash_log:
|
||||
partition_by: ''
|
||||
flush_interval_milliseconds: 1000
|
||||
|
||||
# Parameters for embedded dictionaries, used in Yandex.Metrica.
|
||||
# See https://clickhouse.com/docs/en/dicts/internal_dicts/
|
||||
|
||||
# Path to file with region hierarchy.
|
||||
# path_to_regions_hierarchy_file: /opt/geo/regions_hierarchy.txt
|
||||
|
||||
# Path to directory with files containing names of regions
|
||||
# path_to_regions_names_files: /opt/geo/
|
||||
|
||||
|
||||
# top_level_domains_path: /var/lib/clickhouse/top_level_domains/
|
||||
# Custom TLD lists.
|
||||
# Format: name: /path/to/file
|
||||
|
@ -266,12 +266,25 @@
|
||||
color: var(--null-color);
|
||||
}
|
||||
|
||||
@keyframes hourglass-animation {
|
||||
0% {
|
||||
transform: rotate(-180deg);
|
||||
}
|
||||
50% {
|
||||
transform: rotate(-180deg);
|
||||
}
|
||||
100% {
|
||||
transform: none;
|
||||
}
|
||||
}
|
||||
|
||||
#hourglass
|
||||
{
|
||||
display: none;
|
||||
padding-left: 1rem;
|
||||
margin-left: 1rem;
|
||||
font-size: 110%;
|
||||
color: #888;
|
||||
animation: hourglass-animation 1s linear infinite;
|
||||
}
|
||||
|
||||
#check-mark
|
||||
@ -457,7 +470,7 @@
|
||||
}
|
||||
|
||||
document.getElementById('check-mark').style.display = 'none';
|
||||
document.getElementById('hourglass').style.display = 'inline';
|
||||
document.getElementById('hourglass').style.display = 'inline-block';
|
||||
|
||||
xhr.send(query);
|
||||
}
|
||||
|
@ -79,9 +79,9 @@
|
||||
Each element of list has one of the following forms:
|
||||
<ip> IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
|
||||
2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
|
||||
<host> Hostname. Example: server01.yandex.ru.
|
||||
<host> Hostname. Example: server01.clickhouse.com.
|
||||
To check access, DNS query is performed, and all received addresses compared to peer address.
|
||||
<host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$
|
||||
<host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.clickhouse\.com$
|
||||
To check access, DNS PTR query is performed for peer address and then regexp is applied.
|
||||
Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
|
||||
Strongly recommended that regexp is ends with $
|
||||
|
@ -70,9 +70,9 @@ users:
|
||||
# Each element of list has one of the following forms:
|
||||
# ip: IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
|
||||
# 2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
|
||||
# host: Hostname. Example: server01.yandex.ru.
|
||||
# host: Hostname. Example: server01.clickhouse.com.
|
||||
# To check access, DNS query is performed, and all received addresses compared to peer address.
|
||||
# host_regexp: Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$
|
||||
# host_regexp: Regular expression for host names. Example, ^server\d\d-\d\d-\d\.clickhouse\.com$
|
||||
# To check access, DNS PTR query is performed for peer address and then regexp is applied.
|
||||
# Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
|
||||
# Strongly recommended that regexp is ends with $ and take all expression in ''
|
||||
|
@ -182,6 +182,7 @@ enum class AccessType
|
||||
M(JDBC, "", GLOBAL, SOURCES) \
|
||||
M(HDFS, "", GLOBAL, SOURCES) \
|
||||
M(S3, "", GLOBAL, SOURCES) \
|
||||
M(HIVE, "", GLOBAL, SOURCES) \
|
||||
M(SOURCES, "", GROUP, ALL) \
|
||||
\
|
||||
M(ALL, "ALL PRIVILEGES", GROUP, NONE) /* full access */ \
|
||||
|
@ -107,6 +107,11 @@ const QuotaTypeInfo & QuotaTypeInfo::get(QuotaType type)
|
||||
static const auto info = make_info("EXECUTION_TIME", 1000000000 /* execution_time is stored in nanoseconds */);
|
||||
return info;
|
||||
}
|
||||
case QuotaType::WRITTEN_BYTES:
|
||||
{
|
||||
static const auto info = make_info("WRITTEN_BYTES", 1);
|
||||
return info;
|
||||
}
|
||||
case QuotaType::MAX: break;
|
||||
}
|
||||
throw Exception("Unexpected quota type: " + std::to_string(static_cast<int>(type)), ErrorCodes::LOGICAL_ERROR);
|
||||
|
@ -20,6 +20,7 @@ enum class QuotaType
|
||||
READ_ROWS, /// Number of rows read from tables.
|
||||
READ_BYTES, /// Number of bytes read from tables.
|
||||
EXECUTION_TIME, /// Total amount of query execution time in nanoseconds.
|
||||
WRITTEN_BYTES, /// Number of bytes written to tables.
|
||||
|
||||
MAX
|
||||
};
|
||||
|
@ -13,7 +13,7 @@ namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int QUOTA_EXPIRED;
|
||||
extern const int QUOTA_EXCEEDED;
|
||||
}
|
||||
|
||||
|
||||
@ -33,7 +33,7 @@ struct EnabledQuota::Impl
|
||||
"Quota for user " + backQuote(user_name) + " for " + to_string(duration) + " has been exceeded: "
|
||||
+ type_info.valueToStringWithName(used) + "/" + type_info.valueToString(max) + ". "
|
||||
+ "Interval will end at " + to_string(end_of_interval) + ". " + "Name of quota template: " + backQuote(quota_name),
|
||||
ErrorCodes::QUOTA_EXPIRED);
|
||||
ErrorCodes::QUOTA_EXCEEDED);
|
||||
}
|
||||
|
||||
|
||||
|
@ -208,7 +208,7 @@
|
||||
M(198, DNS_ERROR) \
|
||||
M(199, UNKNOWN_QUOTA) \
|
||||
M(200, QUOTA_DOESNT_ALLOW_KEYS) \
|
||||
M(201, QUOTA_EXPIRED) \
|
||||
M(201, QUOTA_EXCEEDED) \
|
||||
M(202, TOO_MANY_SIMULTANEOUS_QUERIES) \
|
||||
M(203, NO_FREE_CONNECTION) \
|
||||
M(204, CANNOT_FSYNC) \
|
||||
|
@ -241,6 +241,10 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t
|
||||
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
#ifndef NDEBUG
|
||||
assertCacheCorrectness(key, cache_lock);
|
||||
#endif
|
||||
|
||||
/// Get all segments which intersect with the given range.
|
||||
auto file_segments = getImpl(key, range, cache_lock);
|
||||
|
||||
@ -315,7 +319,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t
|
||||
|
||||
LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
|
||||
const Key & key, size_t offset, size_t size, FileSegment::State state,
|
||||
std::lock_guard<std::mutex> & /* cache_lock */)
|
||||
std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
/// Create a file segment cell and put it in `files` map by [key][offset].
|
||||
|
||||
@ -323,8 +327,10 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
|
||||
return nullptr; /// Empty files are not cached.
|
||||
|
||||
if (files[key].contains(offset))
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Cache already exists for key: `{}`, offset: {}, size: {}", keyToStr(key), offset, size);
|
||||
throw Exception(
|
||||
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Cache already exists for key: `{}`, offset: {}, size: {}.\nCurrent cache structure: {}",
|
||||
keyToStr(key), offset, size, dumpStructureImpl(key, cache_lock));
|
||||
|
||||
auto file_segment = std::make_shared<FileSegment>(offset, size, key, this, state);
|
||||
FileSegmentCell cell(std::move(file_segment), queue);
|
||||
@ -340,8 +346,10 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
|
||||
|
||||
auto [it, inserted] = offsets.insert({offset, std::move(cell)});
|
||||
if (!inserted)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Failed to insert into cache key: `{}`, offset: {}, size: {}", keyToStr(key), offset, size);
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Failed to insert into cache key: `{}`, offset: {}, size: {}",
|
||||
keyToStr(key), offset, size);
|
||||
|
||||
return &(it->second);
|
||||
}
|
||||
@ -523,8 +531,8 @@ void LRUFileCache::loadCacheInfoIntoMemory()
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
Key key;
|
||||
UInt64 offset;
|
||||
size_t size;
|
||||
UInt64 offset = 0;
|
||||
size_t size = 0;
|
||||
std::vector<FileSegmentCell *> cells;
|
||||
|
||||
/// cache_base_path / key_prefix / key / offset
|
||||
@ -687,22 +695,32 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
|
||||
}
|
||||
}
|
||||
|
||||
String LRUFileCache::dumpStructure(const Key & key_)
|
||||
String LRUFileCache::dumpStructure(const Key & key)
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
return dumpStructureImpl(key, cache_lock);
|
||||
}
|
||||
|
||||
String LRUFileCache::dumpStructureImpl(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||
{
|
||||
WriteBufferFromOwnString result;
|
||||
for (auto it = queue.begin(); it != queue.end(); ++it)
|
||||
{
|
||||
auto [key, offset] = *it;
|
||||
if (key == key_)
|
||||
{
|
||||
auto * cell = getCell(key, offset, cache_lock);
|
||||
result << (it != queue.begin() ? ", " : "") << cell->file_segment->range().toString();
|
||||
result << "(state: " << cell->file_segment->download_state << ")";
|
||||
}
|
||||
}
|
||||
const auto & cells_by_offset = files[key];
|
||||
|
||||
for (const auto & [offset, cell] : cells_by_offset)
|
||||
result << cell.file_segment->getInfoForLog() << "\n";
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||
{
|
||||
const auto & cells_by_offset = files[key];
|
||||
|
||||
for (const auto & [_, cell] : cells_by_offset)
|
||||
{
|
||||
const auto & file_segment = cell.file_segment;
|
||||
file_segment->assertCorrectness();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ namespace DB
|
||||
class IFileCache : private boost::noncopyable
|
||||
{
|
||||
friend class FileSegment;
|
||||
friend struct FileSegmentsHolder;
|
||||
|
||||
public:
|
||||
using Key = UInt128;
|
||||
@ -196,6 +197,8 @@ private:
|
||||
FileSegments splitRangeIntoEmptyCells(
|
||||
const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
String dumpStructureImpl(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
public:
|
||||
struct Stat
|
||||
{
|
||||
@ -208,6 +211,7 @@ public:
|
||||
Stat getStat();
|
||||
|
||||
String dumpStructure(const Key & key_) override;
|
||||
void assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -159,7 +159,18 @@ void FileSegment::setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_)
|
||||
remote_file_reader = remote_file_reader_;
|
||||
}
|
||||
|
||||
void FileSegment::write(const char * from, size_t size)
|
||||
void FileSegment::resetRemoteFileReader()
|
||||
{
|
||||
if (!isDownloader())
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Only downloader can use remote filesystem file reader");
|
||||
|
||||
if (!remote_file_reader)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Remote file reader does not exist");
|
||||
|
||||
remote_file_reader.reset();
|
||||
}
|
||||
|
||||
void FileSegment::write(const char * from, size_t size, size_t offset_)
|
||||
{
|
||||
if (!size)
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing zero size is not allowed");
|
||||
@ -174,8 +185,24 @@ void FileSegment::write(const char * from, size_t size)
|
||||
"Only downloader can do the downloading. (CallerId: {}, DownloaderId: {})",
|
||||
getCallerId(), downloader_id);
|
||||
|
||||
if (downloaded_size == range().size())
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Attempt to write {} bytes to offset: {}, but current file segment is already fully downloaded",
|
||||
size, offset_);
|
||||
|
||||
auto download_offset = range().left + downloaded_size;
|
||||
if (offset_ != download_offset)
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Attempt to write {} bytes to offset: {}, but current download offset is {}",
|
||||
size, offset_, download_offset);
|
||||
|
||||
if (!cache_writer)
|
||||
{
|
||||
if (downloaded_size > 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Cache writer was finalized (downloaded size: {}, state: {})",
|
||||
downloaded_size, stateToString(download_state));
|
||||
|
||||
auto download_path = cache->getPathInLocalCache(key(), offset());
|
||||
cache_writer = std::make_unique<WriteBufferFromFile>(download_path);
|
||||
}
|
||||
@ -190,19 +217,26 @@ void FileSegment::write(const char * from, size_t size)
|
||||
|
||||
downloaded_size += size;
|
||||
}
|
||||
catch (...)
|
||||
catch (Exception & e)
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
|
||||
LOG_ERROR(log, "Failed to write to cache. File segment info: {}", getInfoForLogImpl(segment_lock));
|
||||
auto info = getInfoForLogImpl(segment_lock);
|
||||
e.addMessage("while writing into cache, info: " + info);
|
||||
|
||||
LOG_ERROR(log, "Failed to write to cache. File segment info: {}", info);
|
||||
|
||||
download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION;
|
||||
|
||||
cache_writer->finalize();
|
||||
cache_writer.reset();
|
||||
|
||||
cv.notify_all();
|
||||
|
||||
throw;
|
||||
}
|
||||
|
||||
assert(getDownloadOffset() == offset_ + size);
|
||||
}
|
||||
|
||||
FileSegment::State FileSegment::wait()
|
||||
@ -270,7 +304,6 @@ void FileSegment::setDownloaded(std::lock_guard<std::mutex> & /* segment_lock */
|
||||
download_state = State::DOWNLOADED;
|
||||
is_downloaded = true;
|
||||
|
||||
assert(cache_writer);
|
||||
if (cache_writer)
|
||||
{
|
||||
cache_writer->finalize();
|
||||
@ -299,107 +332,125 @@ void FileSegment::completeBatchAndResetDownloader()
|
||||
|
||||
void FileSegment::complete(State state)
|
||||
{
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
|
||||
bool is_downloader = downloader_id == getCallerId();
|
||||
if (!is_downloader)
|
||||
{
|
||||
cv.notify_all();
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"File segment can be completed only by downloader or downloader's FileSegmentsHodler");
|
||||
}
|
||||
|
||||
if (state != State::DOWNLOADED
|
||||
&& state != State::PARTIALLY_DOWNLOADED
|
||||
&& state != State::PARTIALLY_DOWNLOADED_NO_CONTINUATION)
|
||||
{
|
||||
cv.notify_all();
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Cannot complete file segment with state: {}", stateToString(state));
|
||||
}
|
||||
|
||||
download_state = state;
|
||||
}
|
||||
|
||||
completeImpl();
|
||||
cv.notify_all();
|
||||
}
|
||||
|
||||
void FileSegment::complete()
|
||||
{
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
|
||||
if (download_state == State::SKIP_CACHE || detached)
|
||||
return;
|
||||
|
||||
if (download_state != State::DOWNLOADED && getDownloadedSize(segment_lock) == range().size())
|
||||
setDownloaded(segment_lock);
|
||||
|
||||
if (download_state == State::DOWNLOADING || download_state == State::EMPTY)
|
||||
download_state = State::PARTIALLY_DOWNLOADED;
|
||||
}
|
||||
|
||||
completeImpl(true);
|
||||
cv.notify_all();
|
||||
}
|
||||
|
||||
void FileSegment::completeImpl(bool allow_non_strict_checking)
|
||||
{
|
||||
/// cache lock is always taken before segment lock.
|
||||
std::lock_guard cache_lock(cache->mutex);
|
||||
std::lock_guard segment_lock(mutex);
|
||||
|
||||
bool download_can_continue = false;
|
||||
|
||||
if (download_state == State::PARTIALLY_DOWNLOADED
|
||||
|| download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION)
|
||||
bool is_downloader = downloader_id == getCallerId();
|
||||
if (!is_downloader)
|
||||
{
|
||||
bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
|
||||
download_can_continue = !is_last_holder && download_state == State::PARTIALLY_DOWNLOADED;
|
||||
cv.notify_all();
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"File segment can be completed only by downloader or downloader's FileSegmentsHodler");
|
||||
}
|
||||
|
||||
if (!download_can_continue)
|
||||
if (state != State::DOWNLOADED
|
||||
&& state != State::PARTIALLY_DOWNLOADED
|
||||
&& state != State::PARTIALLY_DOWNLOADED_NO_CONTINUATION)
|
||||
{
|
||||
cv.notify_all();
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Cannot complete file segment with state: {}", stateToString(state));
|
||||
}
|
||||
|
||||
download_state = state;
|
||||
|
||||
try
|
||||
{
|
||||
completeImpl(cache_lock, segment_lock);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true))
|
||||
downloader_id.clear();
|
||||
|
||||
cv.notify_all();
|
||||
throw;
|
||||
}
|
||||
|
||||
cv.notify_all();
|
||||
}
|
||||
|
||||
void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
|
||||
if (download_state == State::SKIP_CACHE || detached)
|
||||
return;
|
||||
|
||||
if (download_state != State::DOWNLOADED && getDownloadedSize(segment_lock) == range().size())
|
||||
setDownloaded(segment_lock);
|
||||
|
||||
if (download_state == State::DOWNLOADING || download_state == State::EMPTY)
|
||||
{
|
||||
/// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the
|
||||
/// downloader or the only owner of the segment.
|
||||
|
||||
bool can_update_segment_state = downloader_id == getCallerIdImpl(true)
|
||||
|| cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
|
||||
|
||||
if (can_update_segment_state)
|
||||
download_state = State::PARTIALLY_DOWNLOADED;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
completeImpl(cache_lock, segment_lock, /* allow_non_strict_checking */true);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true))
|
||||
downloader_id.clear();
|
||||
|
||||
cv.notify_all();
|
||||
throw;
|
||||
}
|
||||
|
||||
cv.notify_all();
|
||||
}
|
||||
|
||||
void FileSegment::completeImpl(std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & segment_lock, bool allow_non_strict_checking)
|
||||
{
|
||||
bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
|
||||
|
||||
if (is_last_holder
|
||||
&& (download_state == State::PARTIALLY_DOWNLOADED || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION))
|
||||
{
|
||||
size_t current_downloaded_size = getDownloadedSize(segment_lock);
|
||||
if (current_downloaded_size == 0)
|
||||
{
|
||||
size_t current_downloaded_size = getDownloadedSize(segment_lock);
|
||||
if (current_downloaded_size == 0)
|
||||
{
|
||||
download_state = State::SKIP_CACHE;
|
||||
LOG_TEST(log, "Remove cell {} (nothing downloaded)", range().toString());
|
||||
cache->remove(key(), offset(), cache_lock, segment_lock);
|
||||
download_state = State::SKIP_CACHE;
|
||||
LOG_TEST(log, "Remove cell {} (nothing downloaded)", range().toString());
|
||||
cache->remove(key(), offset(), cache_lock, segment_lock);
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
* Only last holder of current file segment can resize the cell,
|
||||
* because there is an invariant that file segments returned to users
|
||||
* in FileSegmentsHolder represent a contiguous range, so we can resize
|
||||
* it only when nobody needs it.
|
||||
*/
|
||||
LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), current_downloaded_size);
|
||||
cache->reduceSizeToDownloaded(key(), offset(), cache_lock, segment_lock);
|
||||
}
|
||||
|
||||
detached = true;
|
||||
}
|
||||
else if (is_last_holder)
|
||||
{
|
||||
/**
|
||||
* Only last holder of current file segment can resize the cell,
|
||||
* because there is an invariant that file segments returned to users
|
||||
* in FileSegmentsHolder represent a contiguous range, so we can resize
|
||||
* it only when nobody needs it.
|
||||
*/
|
||||
LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), current_downloaded_size);
|
||||
cache->reduceSizeToDownloaded(key(), offset(), cache_lock, segment_lock);
|
||||
detached = true;
|
||||
|
||||
detached = true;
|
||||
}
|
||||
if (cache_writer)
|
||||
{
|
||||
cache_writer->finalize();
|
||||
cache_writer.reset();
|
||||
remote_file_reader.reset();
|
||||
}
|
||||
}
|
||||
|
||||
if (!downloader_id.empty() && downloader_id == getCallerIdImpl(allow_non_strict_checking))
|
||||
if (!downloader_id.empty() && (downloader_id == getCallerIdImpl(allow_non_strict_checking) || is_last_holder))
|
||||
{
|
||||
LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state));
|
||||
downloader_id.clear();
|
||||
}
|
||||
|
||||
if (!download_can_continue && cache_writer)
|
||||
{
|
||||
cache_writer->finalize();
|
||||
cache_writer.reset();
|
||||
remote_file_reader.reset();
|
||||
}
|
||||
|
||||
assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0);
|
||||
assertCorrectnessImpl(segment_lock);
|
||||
}
|
||||
|
||||
String FileSegment::getInfoForLog() const
|
||||
@ -440,6 +491,53 @@ String FileSegment::stateToString(FileSegment::State state)
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
void FileSegment::assertCorrectness() const
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
assertCorrectnessImpl(segment_lock);
|
||||
}
|
||||
|
||||
void FileSegment::assertCorrectnessImpl(std::lock_guard<std::mutex> & /* segment_lock */) const
|
||||
{
|
||||
assert(downloader_id.empty() == (download_state != FileSegment::State::DOWNLOADING));
|
||||
assert(!downloader_id.empty() == (download_state == FileSegment::State::DOWNLOADING));
|
||||
assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0);
|
||||
}
|
||||
|
||||
FileSegmentsHolder::~FileSegmentsHolder()
|
||||
{
|
||||
/// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from
|
||||
/// FileSegmentsHolder right after calling file_segment->complete(), so on destruction here
|
||||
/// remain only uncompleted file segments.
|
||||
|
||||
IFileCache * cache = nullptr;
|
||||
|
||||
for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();)
|
||||
{
|
||||
auto current_file_segment_it = file_segment_it;
|
||||
auto & file_segment = *current_file_segment_it;
|
||||
|
||||
if (!cache)
|
||||
cache = file_segment->cache;
|
||||
|
||||
try
|
||||
{
|
||||
/// File segment pointer must be reset right after calling complete() and
|
||||
/// under the same mutex, because complete() checks for segment pointers.
|
||||
std::lock_guard cache_lock(cache->mutex);
|
||||
|
||||
file_segment->complete(cache_lock);
|
||||
|
||||
file_segment_it = file_segments.erase(current_file_segment_it);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String FileSegmentsHolder::toString()
|
||||
{
|
||||
String ranges;
|
||||
|
@ -95,12 +95,14 @@ public:
|
||||
|
||||
bool reserve(size_t size);
|
||||
|
||||
void write(const char * from, size_t size);
|
||||
void write(const char * from, size_t size, size_t offset_);
|
||||
|
||||
RemoteFileReaderPtr getRemoteFileReader();
|
||||
|
||||
void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_);
|
||||
|
||||
void resetRemoteFileReader();
|
||||
|
||||
String getOrSetDownloader();
|
||||
|
||||
String getDownloader() const;
|
||||
@ -121,16 +123,32 @@ public:
|
||||
|
||||
String getInfoForLog() const;
|
||||
|
||||
void assertCorrectness() const;
|
||||
|
||||
private:
|
||||
size_t availableSize() const { return reserved_size - downloaded_size; }
|
||||
bool lastFileSegmentHolder() const;
|
||||
void complete();
|
||||
void completeImpl(bool allow_non_strict_checking = false);
|
||||
void setDownloaded(std::lock_guard<std::mutex> & segment_lock);
|
||||
static String getCallerIdImpl(bool allow_non_strict_checking = false);
|
||||
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
|
||||
|
||||
size_t getDownloadedSize(std::lock_guard<std::mutex> & segment_lock) const;
|
||||
String getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock) const;
|
||||
void assertCorrectnessImpl(std::lock_guard<std::mutex> & segment_lock) const;
|
||||
|
||||
void setDownloaded(std::lock_guard<std::mutex> & segment_lock);
|
||||
|
||||
bool lastFileSegmentHolder() const;
|
||||
|
||||
/// complete() without any completion state is called from destructor of
|
||||
/// FileSegmentsHolder. complete() might check if the caller of the method
|
||||
/// is the last alive holder of the segment. Therefore, complete() and destruction
|
||||
/// of the file segment pointer must be done under the same cache mutex.
|
||||
void complete(std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void completeImpl(
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::lock_guard<std::mutex> & segment_lock, bool allow_non_strict_checking = false);
|
||||
|
||||
static String getCallerIdImpl(bool allow_non_strict_checking = false);
|
||||
|
||||
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
|
||||
|
||||
const Range segment_range;
|
||||
|
||||
@ -169,28 +187,7 @@ struct FileSegmentsHolder : private boost::noncopyable
|
||||
explicit FileSegmentsHolder(FileSegments && file_segments_) : file_segments(std::move(file_segments_)) {}
|
||||
FileSegmentsHolder(FileSegmentsHolder && other) : file_segments(std::move(other.file_segments)) {}
|
||||
|
||||
~FileSegmentsHolder()
|
||||
{
|
||||
/// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from
|
||||
/// FileSegmentsHolder right after calling file_segment->complete(), so on destruction here
|
||||
/// remain only uncompleted file segments.
|
||||
|
||||
for (auto & segment : file_segments)
|
||||
{
|
||||
try
|
||||
{
|
||||
segment->complete();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
throw;
|
||||
#else
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
~FileSegmentsHolder();
|
||||
|
||||
FileSegments file_segments{};
|
||||
|
||||
|
@ -67,7 +67,7 @@ void download(DB::FileSegmentPtr file_segment)
|
||||
fs::create_directories(subdir);
|
||||
|
||||
std::string data(size, '0');
|
||||
file_segment->write(data.data(), size);
|
||||
file_segment->write(data.data(), size, file_segment->getDownloadOffset());
|
||||
}
|
||||
|
||||
void prepareAndDownload(DB::FileSegmentPtr file_segment)
|
||||
|
@ -63,7 +63,11 @@ void CachedReadBufferFromRemoteFS::initialize(size_t offset, size_t size)
|
||||
|
||||
SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getCacheReadBuffer(size_t offset) const
|
||||
{
|
||||
return std::make_shared<ReadBufferFromFile>(cache->getPathInLocalCache(cache_key, offset), settings.local_fs_buffer_size);
|
||||
auto path = cache->getPathInLocalCache(cache_key, offset);
|
||||
auto buf = std::make_shared<ReadBufferFromFile>(path, settings.local_fs_buffer_size);
|
||||
if (buf->size() == 0)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read from an empty cache file: {}", path);
|
||||
return buf;
|
||||
}
|
||||
|
||||
SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getRemoteFSReadBuffer(FileSegmentPtr & file_segment, ReadType read_type_)
|
||||
@ -96,7 +100,6 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getRemoteFSReadBuffer(FileSe
|
||||
remote_fs_segment_reader = remote_file_reader_creator();
|
||||
file_segment->setRemoteFileReader(remote_fs_segment_reader);
|
||||
|
||||
///TODO: add check for pending data
|
||||
return remote_fs_segment_reader;
|
||||
}
|
||||
case ReadType::REMOTE_FS_READ_BYPASS_CACHE:
|
||||
@ -119,7 +122,6 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment(
|
||||
{
|
||||
auto range = file_segment->range();
|
||||
|
||||
/// Each wait() call has a timeout of 1 second.
|
||||
size_t wait_download_max_tries = settings.remote_fs_cache_max_wait_sec;
|
||||
size_t wait_download_tries = 0;
|
||||
|
||||
@ -296,17 +298,21 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
|
||||
{
|
||||
case ReadType::CACHED:
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
auto * file_reader = assert_cast<ReadBufferFromFile *>(read_buffer_for_file_segment.get());
|
||||
size_t file_size = file_reader->size();
|
||||
|
||||
if (file_size == 0 || range.left + file_size <= file_offset_of_buffer_end)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Unexpected state of cache file. Cache file size: {}, cache file offset: {}, "
|
||||
"expected file size to be non-zero and file downloaded size to exceed current file read offset (expected: {} > {})",
|
||||
file_size, range.left, range.left + file_size, file_offset_of_buffer_end);
|
||||
#endif
|
||||
|
||||
size_t seek_offset = file_offset_of_buffer_end - range.left;
|
||||
read_buffer_for_file_segment->seek(seek_offset, SEEK_SET);
|
||||
|
||||
auto * file_reader = assert_cast<ReadBufferFromFile *>(read_buffer_for_file_segment.get());
|
||||
size_t file_size = file_reader->size();
|
||||
auto state = file_segment->state();
|
||||
|
||||
LOG_TEST(log, "Cache file: {}. Cached seek to: {}, file size: {}, file segment state: {}, download offset: {}",
|
||||
file_reader->getFileName(), seek_offset, file_size, state, file_segment->getDownloadOffset());
|
||||
|
||||
assert(file_size > 0);
|
||||
break;
|
||||
}
|
||||
case ReadType::REMOTE_FS_READ_BYPASS_CACHE:
|
||||
@ -384,6 +390,7 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment)
|
||||
LOG_TEST(log, "Bytes to predownload: {}, caller_id: {}", bytes_to_predownload, FileSegment::getCallerId());
|
||||
|
||||
assert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment->getDownloadOffset());
|
||||
size_t current_offset = file_segment->getDownloadOffset();
|
||||
|
||||
while (true)
|
||||
{
|
||||
@ -423,7 +430,11 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment)
|
||||
{
|
||||
LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, implementation_buffer->buffer().size());
|
||||
|
||||
file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size);
|
||||
assert(file_segment->getDownloadOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
|
||||
|
||||
file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size, current_offset);
|
||||
|
||||
current_offset += current_predownload_size;
|
||||
|
||||
bytes_to_predownload -= current_predownload_size;
|
||||
implementation_buffer->position() += current_predownload_size;
|
||||
@ -537,13 +548,15 @@ bool CachedReadBufferFromRemoteFS::nextImpl()
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("Cache info: {}", getInfoForLog());
|
||||
e.addMessage("Cache info: {}", nextimpl_step_log_info);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
bool CachedReadBufferFromRemoteFS::nextImplStep()
|
||||
{
|
||||
last_caller_id = FileSegment::getCallerId();
|
||||
|
||||
if (IFileCache::shouldBypassCache())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Using cache when not allowed");
|
||||
|
||||
@ -554,6 +567,9 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
|
||||
return false;
|
||||
|
||||
SCOPE_EXIT({
|
||||
/// Save state of current file segment before it is completed.
|
||||
nextimpl_step_log_info = getInfoForLog();
|
||||
|
||||
if (current_file_segment_it == file_segments_holder->file_segments.end())
|
||||
return;
|
||||
|
||||
@ -623,6 +639,18 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
|
||||
|
||||
if (!result)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
if (auto * cache_file_reader = typeid_cast<ReadBufferFromFile *>(implementation_buffer.get()))
|
||||
{
|
||||
auto cache_file_size = cache_file_reader->size();
|
||||
if (cache_file_size == 0)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Attempt to read from an empty cache file: {} (just before actual read)",
|
||||
cache_file_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
result = implementation_buffer->next();
|
||||
size = implementation_buffer->buffer().size();
|
||||
}
|
||||
@ -635,7 +663,12 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
|
||||
|
||||
if (file_segment->reserve(size))
|
||||
{
|
||||
file_segment->write(needed_to_predownload ? implementation_buffer->position() : implementation_buffer->buffer().begin(), size);
|
||||
assert(file_segment->getDownloadOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
|
||||
|
||||
file_segment->write(needed_to_predownload ? implementation_buffer->position() : implementation_buffer->buffer().begin(), size, file_offset_of_buffer_end);
|
||||
|
||||
assert(file_segment->getDownloadOffset() <= file_segment->range().right + 1);
|
||||
assert(std::next(current_file_segment_it) == file_segments_holder->file_segments.end() || file_segment->getDownloadOffset() == implementation_buffer->getFileOffsetOfBufferEnd());
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -665,10 +698,15 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
|
||||
}
|
||||
}
|
||||
|
||||
/// - If last file segment was read from remote fs, then we read up to segment->range().right, but
|
||||
/// the requested right boundary could be segment->range().left < requested_right_boundary < segment->range().right.
|
||||
/// Therefore need to resize to a smaller size. And resize must be done after write into cache.
|
||||
/// - If last file segment was read from local fs, then we could read more than file_segemnt->range().right, so resize is also needed.
|
||||
if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end())
|
||||
{
|
||||
size_t remaining_size_to_read = std::min(current_read_range.right, read_until_position - 1) - file_offset_of_buffer_end + 1;
|
||||
size = std::min(size, remaining_size_to_read);
|
||||
assert(implementation_buffer->buffer().size() >= nextimpl_working_buffer_offset + size);
|
||||
implementation_buffer->buffer().resize(nextimpl_working_buffer_offset + size);
|
||||
}
|
||||
|
||||
@ -692,9 +730,16 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
|
||||
read_until_position, first_offset, file_segments_holder->toString());
|
||||
|
||||
if (size == 0 && file_offset_of_buffer_end < read_until_position)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Having zero bytes, but range is not finished: file offset: {}, reading until: {}",
|
||||
file_offset_of_buffer_end, read_until_position);
|
||||
{
|
||||
std::optional<size_t> cache_file_size;
|
||||
if (auto * cache_file_reader = assert_cast<ReadBufferFromFile *>(implementation_buffer.get()))
|
||||
cache_file_size = cache_file_reader->size();
|
||||
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Having zero bytes, but range is not finished: file offset: {}, reading until: {}, read type: {}, cache file size: {}",
|
||||
file_offset_of_buffer_end, read_until_position, toString(read_type), cache_file_size ? std::to_string(*cache_file_size) : "None");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -757,12 +802,24 @@ std::optional<size_t> CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset()
|
||||
|
||||
String CachedReadBufferFromRemoteFS::getInfoForLog()
|
||||
{
|
||||
return fmt::format("Buffer path: {}, hash key: {}, file_offset_of_buffer_end: {}, internal buffer remaining read range: {}, file segment info: {}",
|
||||
remote_fs_object_path, getHexUIntLowercase(cache_key), file_offset_of_buffer_end,
|
||||
(implementation_buffer ?
|
||||
std::to_string(implementation_buffer->getRemainingReadRange().left) + '-' + (implementation_buffer->getRemainingReadRange().right ? std::to_string(*implementation_buffer->getRemainingReadRange().right) : "None")
|
||||
: "None"),
|
||||
(current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog()));
|
||||
auto implementation_buffer_read_range_str =
|
||||
implementation_buffer ?
|
||||
std::to_string(implementation_buffer->getRemainingReadRange().left)
|
||||
+ '-'
|
||||
+ (implementation_buffer->getRemainingReadRange().right ? std::to_string(*implementation_buffer->getRemainingReadRange().right) : "None")
|
||||
: "None";
|
||||
|
||||
auto current_file_segment_info = current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog();
|
||||
|
||||
return fmt::format("Buffer path: {}, hash key: {}, file_offset_of_buffer_end: {}, internal buffer remaining read range: {}, "
|
||||
"read_type: {}, last caller: {}, file segment info: {}",
|
||||
remote_fs_object_path,
|
||||
getHexUIntLowercase(cache_key),
|
||||
file_offset_of_buffer_end,
|
||||
implementation_buffer_read_range_str,
|
||||
toString(read_type),
|
||||
last_caller_id,
|
||||
current_file_segment_info);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -98,7 +98,10 @@ private:
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
size_t first_offset = 0;
|
||||
String nextimpl_step_log_info;
|
||||
String last_caller_id;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -68,16 +68,28 @@ std::future<IAsynchronousReader::Result> ThreadPoolRemoteFSReader::submit(Reques
|
||||
auto * remote_fs_fd = assert_cast<RemoteFSFileDescriptor *>(request.descriptor.get());
|
||||
|
||||
Stopwatch watch(CLOCK_MONOTONIC);
|
||||
auto [bytes_read, offset] = remote_fs_fd->readInto(request.buf, request.size, request.offset, request.ignore);
|
||||
|
||||
ReadBufferFromRemoteFSGather::ReadResult result;
|
||||
try
|
||||
{
|
||||
result = remote_fs_fd->readInto(request.buf, request.size, request.offset, request.ignore);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (running_group)
|
||||
CurrentThread::detachQuery();
|
||||
throw;
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds());
|
||||
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, bytes_read);
|
||||
|
||||
if (running_group)
|
||||
thread_status.detachQuery();
|
||||
CurrentThread::detachQuery();
|
||||
|
||||
return Result{ .size = bytes_read, .offset = offset };
|
||||
ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds());
|
||||
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, result.offset ? result.size - result.offset : result.size);
|
||||
|
||||
return Result{ .size = result.size, .offset = result.offset };
|
||||
});
|
||||
|
||||
auto future = task->get_future();
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/FieldVisitorHash.h>
|
||||
#include <Access/Common/AccessFlags.h>
|
||||
#include <Access/EnabledQuota.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <base/logger_useful.h>
|
||||
|
||||
@ -197,6 +198,9 @@ void AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
|
||||
copyData(*read_buf, write_buf);
|
||||
}
|
||||
|
||||
if (auto quota = query_context->getQuota())
|
||||
quota->used(QuotaType::WRITTEN_BYTES, bytes.size());
|
||||
|
||||
auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId());
|
||||
InsertQuery key{query, settings};
|
||||
|
||||
|
@ -69,6 +69,7 @@ public:
|
||||
/// All below are parameters related to initial query.
|
||||
|
||||
Interface interface = Interface::TCP;
|
||||
bool is_secure = false;
|
||||
|
||||
/// For tcp
|
||||
String os_user;
|
||||
|
@ -1092,6 +1092,17 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
|
||||
if (!res)
|
||||
{
|
||||
TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
|
||||
if (table_function_ptr->needStructureHint())
|
||||
{
|
||||
const auto & insertion_table = getInsertionTable();
|
||||
if (!insertion_table.empty())
|
||||
{
|
||||
const auto & structure_hint
|
||||
= DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
|
||||
table_function_ptr->setStructureHint(structure_hint);
|
||||
}
|
||||
}
|
||||
|
||||
res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
|
||||
|
||||
/// Since ITableFunction::parseArguments() may change table_expression, i.e.:
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <Common/isLocalAddress.h>
|
||||
#include <base/types.h>
|
||||
#include <Storages/MergeTree/ParallelReplicasReadingCoordinator.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
|
||||
|
||||
#include "config_core.h"
|
||||
|
@ -508,7 +508,9 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
|
||||
|
||||
default_expr_list->children.emplace_back(
|
||||
setAlias(
|
||||
col_decl.default_expression->clone(),
|
||||
col_decl.default_specifier == "EPHEMERAL" ? /// can be ASTLiteral::value NULL
|
||||
std::make_shared<ASTLiteral>(data_type_ptr->getDefault()) :
|
||||
col_decl.default_expression->clone(),
|
||||
tmp_column_name));
|
||||
}
|
||||
else
|
||||
@ -536,7 +538,11 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
|
||||
|
||||
if (col_decl.default_expression)
|
||||
{
|
||||
ASTPtr default_expr = col_decl.default_expression->clone();
|
||||
ASTPtr default_expr =
|
||||
col_decl.default_specifier == "EPHEMERAL" && col_decl.default_expression->as<ASTLiteral>()->value.isNull() ?
|
||||
std::make_shared<ASTLiteral>(DataTypeFactory::instance().get(col_decl.type)->getDefault()) :
|
||||
col_decl.default_expression->clone();
|
||||
|
||||
if (col_decl.type)
|
||||
column.type = name_type_it->type;
|
||||
else
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Interpreters/InterpreterInsertQuery.h>
|
||||
|
||||
#include <Access/Common/AccessFlags.h>
|
||||
#include <Access/EnabledQuota.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Processors/Transforms/buildPushingToViewsChain.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
@ -51,6 +52,8 @@ InterpreterInsertQuery::InterpreterInsertQuery(
|
||||
, async_insert(async_insert_)
|
||||
{
|
||||
checkStackSize();
|
||||
if (auto quota = getContext()->getQuota())
|
||||
quota->checkExceeded(QuotaType::WRITTEN_BYTES);
|
||||
}
|
||||
|
||||
|
||||
@ -269,7 +272,7 @@ Chain InterpreterInsertQuery::buildChainImpl(
|
||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0));
|
||||
}
|
||||
|
||||
auto counting = std::make_shared<CountingTransform>(out.getInputHeader(), thread_status);
|
||||
auto counting = std::make_shared<CountingTransform>(out.getInputHeader(), thread_status, getContext()->getQuota());
|
||||
counting->setProcessListElement(context_ptr->getProcessListElement());
|
||||
out.addSource(std::move(counting));
|
||||
|
||||
|
@ -86,6 +86,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
|
||||
{"initial_query_start_time", std::make_shared<DataTypeDateTime>()},
|
||||
{"initial_query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
|
||||
{"interface", std::make_shared<DataTypeUInt8>()},
|
||||
{"is_secure", std::make_shared<DataTypeUInt8>()},
|
||||
{"os_user", std::make_shared<DataTypeString>()},
|
||||
{"client_hostname", std::make_shared<DataTypeString>()},
|
||||
{"client_name", std::make_shared<DataTypeString>()},
|
||||
@ -275,6 +276,7 @@ void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableCo
|
||||
columns[i++]->insert(client_info.initial_query_start_time_microseconds);
|
||||
|
||||
columns[i++]->insert(UInt64(client_info.interface));
|
||||
columns[i++]->insert(static_cast<UInt64>(client_info.is_secure));
|
||||
|
||||
columns[i++]->insert(client_info.os_user);
|
||||
columns[i++]->insert(client_info.client_hostname);
|
||||
|
@ -56,6 +56,7 @@ NamesAndTypesList QueryThreadLogElement::getNamesAndTypes()
|
||||
{"initial_query_start_time", std::make_shared<DataTypeDateTime>()},
|
||||
{"initial_query_start_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
|
||||
{"interface", std::make_shared<DataTypeUInt8>()},
|
||||
{"is_secure", std::make_shared<DataTypeUInt8>()},
|
||||
{"os_user", std::make_shared<DataTypeString>()},
|
||||
{"client_hostname", std::make_shared<DataTypeString>()},
|
||||
{"client_name", std::make_shared<DataTypeString>()},
|
||||
|
@ -243,7 +243,7 @@ void Session::shutdownNamedSessions()
|
||||
NamedSessionsStorage::instance().shutdown();
|
||||
}
|
||||
|
||||
Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_)
|
||||
Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure)
|
||||
: auth_id(UUIDHelpers::generateV4()),
|
||||
global_context(global_context_),
|
||||
interface(interface_),
|
||||
@ -251,6 +251,7 @@ Session::Session(const ContextPtr & global_context_, ClientInfo::Interface inter
|
||||
{
|
||||
prepared_client_info.emplace();
|
||||
prepared_client_info->interface = interface_;
|
||||
prepared_client_info->is_secure = is_secure;
|
||||
}
|
||||
|
||||
Session::~Session()
|
||||
|
@ -32,7 +32,7 @@ public:
|
||||
/// Stops using named sessions. The method must be called at the server shutdown.
|
||||
static void shutdownNamedSessions();
|
||||
|
||||
Session(const ContextPtr & global_context_, ClientInfo::Interface interface_);
|
||||
Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure = false);
|
||||
~Session();
|
||||
|
||||
Session(const Session &&) = delete;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Parsers/ASTColumnDeclaration.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -71,8 +72,12 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta
|
||||
|
||||
if (default_expression)
|
||||
{
|
||||
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "") << ' ';
|
||||
default_expression->formatImpl(settings, state, frame);
|
||||
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "");
|
||||
if (default_specifier != "EPHEMERAL" || !default_expression->as<ASTLiteral>()->value.isNull())
|
||||
{
|
||||
settings.ostr << ' ';
|
||||
default_expression->formatImpl(settings, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
if (comment)
|
||||
|
@ -505,32 +505,34 @@ namespace
|
||||
|
||||
bool parseExtract(IParser::Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ASTPtr expr;
|
||||
|
||||
IParser::Pos begin = pos;
|
||||
IntervalKind interval_kind;
|
||||
if (!parseIntervalKind(pos, expected, interval_kind))
|
||||
{
|
||||
ASTPtr expr_list;
|
||||
if (!ParserExpressionList(false, false).parse(pos, expr_list, expected))
|
||||
return false;
|
||||
|
||||
auto res = std::make_shared<ASTFunction>();
|
||||
res->name = "extract";
|
||||
res->arguments = expr_list;
|
||||
res->children.push_back(res->arguments);
|
||||
node = std::move(res);
|
||||
return true;
|
||||
if (parseIntervalKind(pos, expected, interval_kind))
|
||||
{
|
||||
ASTPtr expr;
|
||||
|
||||
ParserKeyword s_from("FROM");
|
||||
ParserExpression elem_parser;
|
||||
|
||||
if (s_from.ignore(pos, expected) && elem_parser.parse(pos, expr, expected))
|
||||
{
|
||||
node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), expr);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
ParserKeyword s_from("FROM");
|
||||
if (!s_from.ignore(pos, expected))
|
||||
pos = begin;
|
||||
|
||||
ASTPtr expr_list;
|
||||
if (!ParserExpressionList(false, false).parse(pos, expr_list, expected))
|
||||
return false;
|
||||
|
||||
ParserExpression elem_parser;
|
||||
if (!elem_parser.parse(pos, expr, expected))
|
||||
return false;
|
||||
|
||||
node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), expr);
|
||||
auto res = std::make_shared<ASTFunction>();
|
||||
res->name = "extract";
|
||||
res->arguments = expr_list;
|
||||
res->children.push_back(res->arguments);
|
||||
node = std::move(res);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Parsers/ParserDataType.h>
|
||||
#include <Poco/String.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -185,8 +186,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
|
||||
}
|
||||
|
||||
Pos pos_before_specifier = pos;
|
||||
if (s_default.ignore(pos, expected) || s_materialized.ignore(pos, expected) ||
|
||||
s_ephemeral.ignore(pos, expected) || s_alias.ignore(pos, expected))
|
||||
if (s_default.ignore(pos, expected) || s_materialized.ignore(pos, expected) || s_alias.ignore(pos, expected))
|
||||
{
|
||||
default_specifier = Poco::toUpper(std::string{pos_before_specifier->begin, pos_before_specifier->end});
|
||||
|
||||
@ -194,6 +194,12 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
|
||||
if (!expr_parser.parse(pos, default_expression, expected))
|
||||
return false;
|
||||
}
|
||||
else if (s_ephemeral.ignore(pos, expected))
|
||||
{
|
||||
default_specifier = "EPHEMERAL";
|
||||
if (!expr_parser.parse(pos, default_expression, expected) && type)
|
||||
default_expression = std::make_shared<ASTLiteral>(Field());
|
||||
}
|
||||
|
||||
if (require_type && !type && !default_expression)
|
||||
return false; /// reject column name without type
|
||||
|
@ -7,7 +7,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_MANY_ROWS_OR_BYTES;
|
||||
extern const int QUOTA_EXPIRED;
|
||||
extern const int QUOTA_EXCEEDED;
|
||||
extern const int QUERY_WAS_CANCELLED;
|
||||
}
|
||||
|
||||
@ -34,7 +34,7 @@ static bool checkCanAddAdditionalInfoToException(const DB::Exception & exception
|
||||
{
|
||||
/// Don't add additional info to limits and quota exceptions, and in case of kill query (to pass tests).
|
||||
return exception.code() != ErrorCodes::TOO_MANY_ROWS_OR_BYTES
|
||||
&& exception.code() != ErrorCodes::QUOTA_EXPIRED
|
||||
&& exception.code() != ErrorCodes::QUOTA_EXCEEDED
|
||||
&& exception.code() != ErrorCodes::QUERY_WAS_CANCELLED;
|
||||
}
|
||||
|
||||
|
@ -17,7 +17,7 @@ struct ColumnMapping
|
||||
OptionalIndexes column_indexes_for_input_fields;
|
||||
|
||||
/// The list of column indexes that are not presented in input data.
|
||||
std::vector<UInt8> not_presented_columns;
|
||||
std::vector<size_t> not_presented_columns;
|
||||
|
||||
/// The list of column names in input data. Needed for better exception messages.
|
||||
std::vector<String> names_of_columns;
|
||||
|
@ -98,8 +98,6 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
|
||||
/// Skip prefix before names and types.
|
||||
format_reader->skipPrefixBeforeHeader();
|
||||
|
||||
/// This is a bit of abstraction leakage, but we need it in parallel parsing:
|
||||
/// we check if this InputFormat is working with the "real" beginning of the data.
|
||||
if (with_names)
|
||||
{
|
||||
if (format_settings.with_names_use_header)
|
||||
|
@ -18,11 +18,12 @@ namespace DB
|
||||
|
||||
void CountingTransform::onConsume(Chunk chunk)
|
||||
{
|
||||
if (quota)
|
||||
quota->used(QuotaType::WRITTEN_BYTES, chunk.bytes());
|
||||
|
||||
Progress local_progress{WriteProgress(chunk.getNumRows(), chunk.bytes())};
|
||||
progress.incrementPiecewiseAtomically(local_progress);
|
||||
|
||||
//std::cerr << "============ counting adding progress for " << static_cast<const void *>(thread_status) << ' ' << chunk.getNumRows() << " rows\n";
|
||||
|
||||
if (thread_status)
|
||||
{
|
||||
thread_status->performance_counters.increment(ProfileEvents::InsertedRows, local_progress.written_rows);
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <IO/Progress.h>
|
||||
#include <Processors/Transforms/ExceptionKeepingTransform.h>
|
||||
#include <Access/EnabledQuota.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -14,8 +15,12 @@ class ThreadStatus;
|
||||
class CountingTransform final : public ExceptionKeepingTransform
|
||||
{
|
||||
public:
|
||||
explicit CountingTransform(const Block & header, ThreadStatus * thread_status_ = nullptr)
|
||||
: ExceptionKeepingTransform(header, header), thread_status(thread_status_) {}
|
||||
explicit CountingTransform(
|
||||
const Block & header,
|
||||
ThreadStatus * thread_status_ = nullptr,
|
||||
std::shared_ptr<const EnabledQuota> quota_ = nullptr)
|
||||
: ExceptionKeepingTransform(header, header)
|
||||
, thread_status(thread_status_), quota(std::move(quota_)) {}
|
||||
|
||||
String getName() const override { return "CountingTransform"; }
|
||||
|
||||
@ -47,6 +52,9 @@ protected:
|
||||
ProgressCallback progress_callback;
|
||||
QueryStatus * process_elem = nullptr;
|
||||
ThreadStatus * thread_status = nullptr;
|
||||
|
||||
/// Quota is used to limit amount of written bytes.
|
||||
std::shared_ptr<const EnabledQuota> quota;
|
||||
Chunk cur_chunk;
|
||||
};
|
||||
|
||||
|
@ -759,44 +759,6 @@ void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter)
|
||||
header.clear();
|
||||
}
|
||||
|
||||
void Pipe::setOutputFormat(ProcessorPtr output)
|
||||
{
|
||||
if (output_ports.empty())
|
||||
throw Exception("Cannot set output format to empty Pipe.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (output_ports.size() != 1)
|
||||
throw Exception("Cannot set output format to Pipe because single output port is expected, "
|
||||
"but it has " + std::to_string(output_ports.size()) + " ports", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
auto * format = dynamic_cast<IOutputFormat * >(output.get());
|
||||
|
||||
if (!format)
|
||||
throw Exception("IOutputFormat processor expected for QueryPipelineBuilder::setOutputFormat.",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
auto & main = format->getPort(IOutputFormat::PortKind::Main);
|
||||
auto & totals = format->getPort(IOutputFormat::PortKind::Totals);
|
||||
auto & extremes = format->getPort(IOutputFormat::PortKind::Extremes);
|
||||
|
||||
if (!totals_port)
|
||||
addTotalsSource(std::make_shared<NullSource>(totals.getHeader()));
|
||||
|
||||
if (!extremes_port)
|
||||
addExtremesSource(std::make_shared<NullSource>(extremes.getHeader()));
|
||||
|
||||
if (collected_processors)
|
||||
collected_processors->emplace_back(output);
|
||||
|
||||
processors.emplace_back(std::move(output));
|
||||
|
||||
connect(*output_ports.front(), main);
|
||||
connect(*totals_port, totals);
|
||||
connect(*extremes_port, extremes);
|
||||
|
||||
output_ports.clear();
|
||||
header.clear();
|
||||
}
|
||||
|
||||
void Pipe::transform(const Transformer & transformer)
|
||||
{
|
||||
if (output_ports.empty())
|
||||
|
@ -141,7 +141,6 @@ private:
|
||||
bool isCompleted() const { return !empty() && output_ports.empty(); }
|
||||
static Pipe unitePipes(Pipes pipes, Processors * collected_processors, bool allow_empty_header);
|
||||
void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter);
|
||||
void setOutputFormat(ProcessorPtr output);
|
||||
|
||||
friend class QueryPipelineBuilder;
|
||||
friend class QueryPipeline;
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
|
||||
#include <Processors/Transforms/JoiningTransform.h>
|
||||
#include <Processors/Formats/IOutputFormat.h>
|
||||
#include <Processors/Executors/PipelineExecutor.h>
|
||||
#include <Processors/Transforms/PartialSortingTransform.h>
|
||||
#include <Processors/Sources/SourceFromSingleChunk.h>
|
||||
@ -247,21 +246,6 @@ void QueryPipelineBuilder::addExtremesTransform()
|
||||
pipe.addTransform(std::move(transform), nullptr, port);
|
||||
}
|
||||
|
||||
void QueryPipelineBuilder::setOutputFormat(ProcessorPtr output)
|
||||
{
|
||||
checkInitializedAndNotCompleted();
|
||||
|
||||
if (output_format)
|
||||
throw Exception("QueryPipeline already has output.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
resize(1);
|
||||
|
||||
output_format = dynamic_cast<IOutputFormat * >(output.get());
|
||||
pipe.setOutputFormat(std::move(output));
|
||||
|
||||
initRowsBeforeLimit();
|
||||
}
|
||||
|
||||
QueryPipelineBuilder QueryPipelineBuilder::unitePipelines(
|
||||
std::vector<std::unique_ptr<QueryPipelineBuilder>> pipelines,
|
||||
size_t max_threads_limit,
|
||||
@ -461,93 +445,6 @@ void QueryPipelineBuilder::setProcessListElement(QueryStatus * elem)
|
||||
}
|
||||
}
|
||||
|
||||
void QueryPipelineBuilder::initRowsBeforeLimit()
|
||||
{
|
||||
RowsBeforeLimitCounterPtr rows_before_limit_at_least;
|
||||
|
||||
/// TODO: add setRowsBeforeLimitCounter as virtual method to IProcessor.
|
||||
std::vector<LimitTransform *> limits;
|
||||
std::vector<RemoteSource *> remote_sources;
|
||||
|
||||
std::unordered_set<IProcessor *> visited;
|
||||
|
||||
struct QueuedEntry
|
||||
{
|
||||
IProcessor * processor;
|
||||
bool visited_limit;
|
||||
};
|
||||
|
||||
std::queue<QueuedEntry> queue;
|
||||
|
||||
queue.push({ output_format, false });
|
||||
visited.emplace(output_format);
|
||||
|
||||
while (!queue.empty())
|
||||
{
|
||||
auto * processor = queue.front().processor;
|
||||
auto visited_limit = queue.front().visited_limit;
|
||||
queue.pop();
|
||||
|
||||
if (!visited_limit)
|
||||
{
|
||||
if (auto * limit = typeid_cast<LimitTransform *>(processor))
|
||||
{
|
||||
visited_limit = true;
|
||||
limits.emplace_back(limit);
|
||||
}
|
||||
|
||||
if (auto * source = typeid_cast<RemoteSource *>(processor))
|
||||
remote_sources.emplace_back(source);
|
||||
}
|
||||
else if (auto * sorting = typeid_cast<PartialSortingTransform *>(processor))
|
||||
{
|
||||
if (!rows_before_limit_at_least)
|
||||
rows_before_limit_at_least = std::make_shared<RowsBeforeLimitCounter>();
|
||||
|
||||
sorting->setRowsBeforeLimitCounter(rows_before_limit_at_least);
|
||||
|
||||
/// Don't go to children. Take rows_before_limit from last PartialSortingTransform.
|
||||
continue;
|
||||
}
|
||||
|
||||
/// Skip totals and extremes port for output format.
|
||||
if (auto * format = dynamic_cast<IOutputFormat *>(processor))
|
||||
{
|
||||
auto * child_processor = &format->getPort(IOutputFormat::PortKind::Main).getOutputPort().getProcessor();
|
||||
if (visited.emplace(child_processor).second)
|
||||
queue.push({ child_processor, visited_limit });
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
for (auto & child_port : processor->getInputs())
|
||||
{
|
||||
auto * child_processor = &child_port.getOutputPort().getProcessor();
|
||||
if (visited.emplace(child_processor).second)
|
||||
queue.push({ child_processor, visited_limit });
|
||||
}
|
||||
}
|
||||
|
||||
if (!rows_before_limit_at_least && (!limits.empty() || !remote_sources.empty()))
|
||||
{
|
||||
rows_before_limit_at_least = std::make_shared<RowsBeforeLimitCounter>();
|
||||
|
||||
for (auto & limit : limits)
|
||||
limit->setRowsBeforeLimitCounter(rows_before_limit_at_least);
|
||||
|
||||
for (auto & source : remote_sources)
|
||||
source->setRowsBeforeLimitCounter(rows_before_limit_at_least);
|
||||
}
|
||||
|
||||
/// If there is a limit, then enable rows_before_limit_at_least
|
||||
/// It is needed when zero rows is read, but we still want rows_before_limit_at_least in result.
|
||||
if (!limits.empty())
|
||||
rows_before_limit_at_least->add(0);
|
||||
|
||||
if (rows_before_limit_at_least)
|
||||
output_format->setRowsBeforeLimitCounter(rows_before_limit_at_least);
|
||||
}
|
||||
|
||||
PipelineExecutorPtr QueryPipelineBuilder::execute()
|
||||
{
|
||||
if (!isCompleted())
|
||||
|
@ -10,8 +10,6 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IOutputFormat;
|
||||
|
||||
class QueryPipelineProcessorsCollector;
|
||||
|
||||
struct AggregatingTransformParams;
|
||||
@ -71,10 +69,6 @@ public:
|
||||
void addTotalsHavingTransform(ProcessorPtr transform);
|
||||
/// Add transform which calculates extremes. This transform adds extremes port and doesn't change inputs number.
|
||||
void addExtremesTransform();
|
||||
/// Resize pipeline to single output and add IOutputFormat. Pipeline will be completed after this transformation.
|
||||
void setOutputFormat(ProcessorPtr output);
|
||||
/// Get current OutputFormat.
|
||||
IOutputFormat * getOutputFormat() const { return output_format; }
|
||||
/// Sink is a processor with single input port and no output ports. Creates sink for each output port.
|
||||
/// Pipeline will be completed after this transformation.
|
||||
void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter);
|
||||
@ -163,7 +157,6 @@ public:
|
||||
private:
|
||||
|
||||
Pipe pipe;
|
||||
IOutputFormat * output_format = nullptr;
|
||||
|
||||
/// Limit on the number of threads. Zero means no limit.
|
||||
/// Sometimes, more streams are created then the number of threads for more optimal execution.
|
||||
@ -174,8 +167,6 @@ private:
|
||||
void checkInitialized();
|
||||
void checkInitializedAndNotCompleted();
|
||||
|
||||
void initRowsBeforeLimit();
|
||||
|
||||
void setCollectedProcessors(Processors * processors);
|
||||
|
||||
friend class QueryPipelineProcessorsCollector;
|
||||
|
@ -922,7 +922,7 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
|
||||
setThreadName("HTTPHandler");
|
||||
ThreadStatus thread_status;
|
||||
|
||||
session = std::make_unique<Session>(server.context(), ClientInfo::Interface::HTTP);
|
||||
session = std::make_unique<Session>(server.context(), ClientInfo::Interface::HTTP, request.isSecure());
|
||||
SCOPE_EXIT({ session.reset(); });
|
||||
std::optional<CurrentThread::QueryScope> query_scope;
|
||||
|
||||
|
@ -110,7 +110,7 @@ void TCPHandler::runImpl()
|
||||
setThreadName("TCPHandler");
|
||||
ThreadStatus thread_status;
|
||||
|
||||
session = std::make_unique<Session>(server.context(), ClientInfo::Interface::TCP);
|
||||
session = std::make_unique<Session>(server.context(), ClientInfo::Interface::TCP, socket().secure());
|
||||
extractConnectionSettingsFromContext(server.context());
|
||||
|
||||
socket().setReceiveTimeout(receive_timeout);
|
||||
|
@ -25,6 +25,8 @@ namespace ErrorCodes
|
||||
const String HDFSBuilderWrapper::CONFIG_PREFIX = "hdfs";
|
||||
const String HDFS_URL_REGEXP = "^hdfs://[^/]*/.*";
|
||||
|
||||
std::once_flag init_libhdfs3_conf_flag;
|
||||
|
||||
void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config,
|
||||
const String & prefix, bool isUser)
|
||||
{
|
||||
@ -123,19 +125,22 @@ HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::A
|
||||
throw Exception("Illegal HDFS URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
// Shall set env LIBHDFS3_CONF *before* HDFSBuilderWrapper construction.
|
||||
String libhdfs3_conf = config.getString(HDFSBuilderWrapper::CONFIG_PREFIX + ".libhdfs3_conf", "");
|
||||
if (!libhdfs3_conf.empty())
|
||||
std::call_once(init_libhdfs3_conf_flag, [&config]()
|
||||
{
|
||||
if (std::filesystem::path{libhdfs3_conf}.is_relative() && !std::filesystem::exists(libhdfs3_conf))
|
||||
String libhdfs3_conf = config.getString(HDFSBuilderWrapper::CONFIG_PREFIX + ".libhdfs3_conf", "");
|
||||
if (!libhdfs3_conf.empty())
|
||||
{
|
||||
const String config_path = config.getString("config-file", "config.xml");
|
||||
const auto config_dir = std::filesystem::path{config_path}.remove_filename();
|
||||
if (std::filesystem::exists(config_dir / libhdfs3_conf))
|
||||
libhdfs3_conf = std::filesystem::absolute(config_dir / libhdfs3_conf);
|
||||
if (std::filesystem::path{libhdfs3_conf}.is_relative() && !std::filesystem::exists(libhdfs3_conf))
|
||||
{
|
||||
const String config_path = config.getString("config-file", "config.xml");
|
||||
const auto config_dir = std::filesystem::path{config_path}.remove_filename();
|
||||
if (std::filesystem::exists(config_dir / libhdfs3_conf))
|
||||
libhdfs3_conf = std::filesystem::absolute(config_dir / libhdfs3_conf);
|
||||
}
|
||||
setenv("LIBHDFS3_CONF", libhdfs3_conf.c_str(), 1);
|
||||
}
|
||||
});
|
||||
|
||||
setenv("LIBHDFS3_CONF", libhdfs3_conf.c_str(), 1);
|
||||
}
|
||||
HDFSBuilderWrapper builder;
|
||||
if (builder.get() == nullptr)
|
||||
throw Exception("Unable to create builder to connect to HDFS: " +
|
||||
|
@ -22,8 +22,6 @@ ReadBufferFromHDFS::~ReadBufferFromHDFS() = default;
|
||||
|
||||
struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<SeekableReadBuffer>
|
||||
{
|
||||
/// HDFS create/open functions are not thread safe
|
||||
static std::mutex hdfs_init_mutex;
|
||||
|
||||
String hdfs_uri;
|
||||
String hdfs_file_path;
|
||||
@ -46,8 +44,6 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
|
||||
, builder(createHDFSBuilder(hdfs_uri_, config_))
|
||||
, read_until_position(read_until_position_)
|
||||
{
|
||||
std::lock_guard lock(hdfs_init_mutex);
|
||||
|
||||
fs = createHDFSFS(builder.get());
|
||||
fin = hdfsOpenFile(fs.get(), hdfs_file_path.c_str(), O_RDONLY, 0, 0, 0);
|
||||
|
||||
@ -59,7 +55,6 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
|
||||
|
||||
~ReadBufferFromHDFSImpl() override
|
||||
{
|
||||
std::lock_guard lock(hdfs_init_mutex);
|
||||
hdfsCloseFile(fs.get(), fin);
|
||||
}
|
||||
|
||||
@ -124,9 +119,6 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
std::mutex ReadBufferFromHDFS::ReadBufferFromHDFSImpl::hdfs_init_mutex;
|
||||
|
||||
ReadBufferFromHDFS::ReadBufferFromHDFS(
|
||||
const String & hdfs_uri_,
|
||||
const String & hdfs_file_path_,
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <Storages/HDFS/ReadBufferFromHDFS.h>
|
||||
#include <Storages/HDFS/WriteBufferFromHDFS.h>
|
||||
#include <Storages/PartitionedSink.h>
|
||||
#include <Storages/getVirtualsForStorage.h>
|
||||
|
||||
#include <Formats/ReadSchemaUtils.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
@ -164,6 +165,13 @@ StorageHDFS::StorageHDFS(
|
||||
storage_metadata.setConstraints(constraints_);
|
||||
storage_metadata.setComment(comment);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
|
||||
auto default_virtuals = NamesAndTypesList{
|
||||
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
|
||||
|
||||
auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList();
|
||||
virtual_columns = getVirtualsForStorage(columns, default_virtuals);
|
||||
}
|
||||
|
||||
ColumnsDescription StorageHDFS::getTableStructureFromData(
|
||||
@ -273,36 +281,6 @@ private:
|
||||
Strings::iterator uris_iter;
|
||||
};
|
||||
|
||||
Block HDFSSource::getHeader(const StorageMetadataPtr & metadata_snapshot, bool need_path_column, bool need_file_column)
|
||||
{
|
||||
auto header = metadata_snapshot->getSampleBlock();
|
||||
/// Note: AddingDefaultsBlockInputStream doesn't change header.
|
||||
if (need_path_column)
|
||||
header.insert(
|
||||
{DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
|
||||
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
|
||||
"_path"});
|
||||
if (need_file_column)
|
||||
header.insert(
|
||||
{DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
|
||||
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
|
||||
"_file"});
|
||||
return header;
|
||||
}
|
||||
|
||||
Block HDFSSource::getBlockForSource(
|
||||
const StorageHDFSPtr & storage,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const ColumnsDescription & columns_description,
|
||||
bool need_path_column,
|
||||
bool need_file_column)
|
||||
{
|
||||
if (storage->isColumnOriented())
|
||||
return storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
|
||||
else
|
||||
return getHeader(storage_snapshot->metadata, need_path_column, need_file_column);
|
||||
}
|
||||
|
||||
HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(ContextPtr context_, const String & uri)
|
||||
: pimpl(std::make_shared<HDFSSource::DisclosedGlobIterator::Impl>(context_, uri)) {}
|
||||
|
||||
@ -321,22 +299,28 @@ String HDFSSource::URISIterator::next()
|
||||
return pimpl->next();
|
||||
}
|
||||
|
||||
Block HDFSSource::getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns)
|
||||
{
|
||||
for (const auto & virtual_column : requested_virtual_columns)
|
||||
sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name});
|
||||
|
||||
return sample_block;
|
||||
}
|
||||
|
||||
HDFSSource::HDFSSource(
|
||||
StorageHDFSPtr storage_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const Block & block_for_format_,
|
||||
const std::vector<NameAndTypePair> & requested_virtual_columns_,
|
||||
ContextPtr context_,
|
||||
UInt64 max_block_size_,
|
||||
bool need_path_column_,
|
||||
bool need_file_column_,
|
||||
std::shared_ptr<IteratorWrapper> file_iterator_,
|
||||
ColumnsDescription columns_description_)
|
||||
: SourceWithProgress(getBlockForSource(storage_, storage_snapshot_, columns_description_, need_path_column_, need_file_column_))
|
||||
: SourceWithProgress(getHeader(block_for_format_, requested_virtual_columns_))
|
||||
, WithContext(context_)
|
||||
, storage(std::move(storage_))
|
||||
, storage_snapshot(storage_snapshot_)
|
||||
, block_for_format(block_for_format_)
|
||||
, requested_virtual_columns(requested_virtual_columns_)
|
||||
, max_block_size(max_block_size_)
|
||||
, need_path_column(need_path_column_)
|
||||
, need_file_column(need_file_column_)
|
||||
, file_iterator(file_iterator_)
|
||||
, columns_description(std::move(columns_description_))
|
||||
{
|
||||
@ -361,14 +345,7 @@ bool HDFSSource::initialize()
|
||||
auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method);
|
||||
read_buf = wrapReadBufferWithCompressionMethod(std::make_unique<ReadBufferFromHDFS>(uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef()), compression);
|
||||
|
||||
auto get_block_for_format = [&]() -> Block
|
||||
{
|
||||
if (storage->isColumnOriented())
|
||||
return storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
|
||||
return storage_snapshot->metadata->getSampleBlock();
|
||||
};
|
||||
|
||||
auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, get_block_for_format(), max_block_size);
|
||||
auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size);
|
||||
|
||||
QueryPipelineBuilder builder;
|
||||
builder.init(Pipe(input_format));
|
||||
@ -402,20 +379,21 @@ Chunk HDFSSource::generate()
|
||||
Columns columns = chunk.getColumns();
|
||||
UInt64 num_rows = chunk.getNumRows();
|
||||
|
||||
/// Enrich with virtual columns.
|
||||
if (need_path_column)
|
||||
for (const auto & virtual_column : requested_virtual_columns)
|
||||
{
|
||||
auto column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumnConst(num_rows, current_path);
|
||||
columns.push_back(column->convertToFullColumnIfConst());
|
||||
}
|
||||
if (virtual_column.name == "_path")
|
||||
{
|
||||
auto column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumnConst(num_rows, current_path);
|
||||
columns.push_back(column->convertToFullColumnIfConst());
|
||||
}
|
||||
else if (virtual_column.name == "_file")
|
||||
{
|
||||
size_t last_slash_pos = current_path.find_last_of('/');
|
||||
auto file_name = current_path.substr(last_slash_pos + 1);
|
||||
|
||||
if (need_file_column)
|
||||
{
|
||||
size_t last_slash_pos = current_path.find_last_of('/');
|
||||
auto file_name = current_path.substr(last_slash_pos + 1);
|
||||
|
||||
auto column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumnConst(num_rows, std::move(file_name));
|
||||
columns.push_back(column->convertToFullColumnIfConst());
|
||||
auto column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumnConst(num_rows, std::move(file_name));
|
||||
columns.push_back(column->convertToFullColumnIfConst());
|
||||
}
|
||||
}
|
||||
|
||||
return Chunk(std::move(columns), num_rows);
|
||||
@ -526,17 +504,6 @@ Pipe StorageHDFS::read(
|
||||
size_t max_block_size,
|
||||
unsigned num_streams)
|
||||
{
|
||||
bool need_path_column = false;
|
||||
bool need_file_column = false;
|
||||
|
||||
for (const auto & column : column_names)
|
||||
{
|
||||
if (column == "_path")
|
||||
need_path_column = true;
|
||||
if (column == "_file")
|
||||
need_file_column = true;
|
||||
}
|
||||
|
||||
std::shared_ptr<HDFSSource::IteratorWrapper> iterator_wrapper{nullptr};
|
||||
if (distributed_processing)
|
||||
{
|
||||
@ -563,27 +530,51 @@ Pipe StorageHDFS::read(
|
||||
});
|
||||
}
|
||||
|
||||
std::unordered_set<String> column_names_set(column_names.begin(), column_names.end());
|
||||
std::vector<NameAndTypePair> requested_virtual_columns;
|
||||
|
||||
for (const auto & virtual_column : getVirtuals())
|
||||
{
|
||||
if (column_names_set.contains(virtual_column.name))
|
||||
requested_virtual_columns.push_back(virtual_column);
|
||||
}
|
||||
|
||||
ColumnsDescription columns_description;
|
||||
Block block_for_format;
|
||||
if (isColumnOriented())
|
||||
{
|
||||
auto fetch_columns = column_names;
|
||||
const auto & virtuals = getVirtuals();
|
||||
std::erase_if(
|
||||
fetch_columns,
|
||||
[&](const String & col)
|
||||
{ return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); });
|
||||
|
||||
if (fetch_columns.empty())
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
|
||||
|
||||
columns_description = ColumnsDescription{
|
||||
storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()};
|
||||
block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
|
||||
}
|
||||
else
|
||||
{
|
||||
columns_description = storage_snapshot->metadata->getColumns();
|
||||
block_for_format = storage_snapshot->metadata->getSampleBlock();
|
||||
}
|
||||
|
||||
Pipes pipes;
|
||||
auto this_ptr = std::static_pointer_cast<StorageHDFS>(shared_from_this());
|
||||
for (size_t i = 0; i < num_streams; ++i)
|
||||
{
|
||||
const auto get_columns_for_format = [&]() -> ColumnsDescription
|
||||
{
|
||||
if (isColumnOriented())
|
||||
return ColumnsDescription{storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()};
|
||||
else
|
||||
return storage_snapshot->metadata->getColumns();
|
||||
};
|
||||
|
||||
pipes.emplace_back(std::make_shared<HDFSSource>(
|
||||
this_ptr,
|
||||
storage_snapshot,
|
||||
block_for_format,
|
||||
requested_virtual_columns,
|
||||
context_,
|
||||
max_block_size,
|
||||
need_path_column,
|
||||
need_file_column,
|
||||
iterator_wrapper,
|
||||
get_columns_for_format()));
|
||||
columns_description));
|
||||
}
|
||||
return Pipe::unitePipes(std::move(pipes));
|
||||
}
|
||||
@ -715,9 +706,7 @@ void registerStorageHDFS(StorageFactory & factory)
|
||||
|
||||
NamesAndTypesList StorageHDFS::getVirtuals() const
|
||||
{
|
||||
return NamesAndTypesList{
|
||||
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
|
||||
return virtual_columns;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -76,6 +76,7 @@ private:
|
||||
const bool distributed_processing;
|
||||
ASTPtr partition_by;
|
||||
bool is_path_with_globs;
|
||||
NamesAndTypesList virtual_columns;
|
||||
|
||||
Poco::Logger * log = &Poco::Logger::get("StorageHDFS");
|
||||
};
|
||||
@ -110,25 +111,14 @@ public:
|
||||
using IteratorWrapper = std::function<String()>;
|
||||
using StorageHDFSPtr = std::shared_ptr<StorageHDFS>;
|
||||
|
||||
static Block getHeader(
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
bool need_path_column,
|
||||
bool need_file_column);
|
||||
|
||||
static Block getBlockForSource(
|
||||
const StorageHDFSPtr & storage,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const ColumnsDescription & columns_description,
|
||||
bool need_path_column,
|
||||
bool need_file_column);
|
||||
static Block getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns);
|
||||
|
||||
HDFSSource(
|
||||
StorageHDFSPtr storage_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const Block & block_for_format_,
|
||||
const std::vector<NameAndTypePair> & requested_virtual_columns_,
|
||||
ContextPtr context_,
|
||||
UInt64 max_block_size_,
|
||||
bool need_path_column_,
|
||||
bool need_file_column_,
|
||||
std::shared_ptr<IteratorWrapper> file_iterator_,
|
||||
ColumnsDescription columns_description_);
|
||||
|
||||
@ -140,7 +130,8 @@ public:
|
||||
|
||||
private:
|
||||
StorageHDFSPtr storage;
|
||||
StorageSnapshotPtr storage_snapshot;
|
||||
Block block_for_format;
|
||||
std::vector<NameAndTypePair> requested_virtual_columns;
|
||||
UInt64 max_block_size;
|
||||
bool need_path_column;
|
||||
bool need_file_column;
|
||||
|
@ -742,6 +742,7 @@ void registerStorageHive(StorageFactory & factory)
|
||||
StorageFactory::StorageFeatures{
|
||||
.supports_settings = true,
|
||||
.supports_sort_order = true,
|
||||
.source_access_type = AccessType::HIVE,
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -635,24 +635,35 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
|
||||
/// Motivation: memory for index is shared between queries - not belong to the query itself.
|
||||
MemoryTrackerBlockerInThread temporarily_disable_memory_tracker(VariableContext::Global);
|
||||
|
||||
loadUUID();
|
||||
loadColumns(require_columns_checksums);
|
||||
loadChecksums(require_columns_checksums);
|
||||
loadIndexGranularity();
|
||||
calculateColumnsAndSecondaryIndicesSizesOnDisk();
|
||||
loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity`
|
||||
loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`.
|
||||
loadPartitionAndMinMaxIndex();
|
||||
if (!parent_part)
|
||||
try
|
||||
{
|
||||
loadTTLInfos();
|
||||
loadProjections(require_columns_checksums, check_consistency);
|
||||
loadUUID();
|
||||
loadColumns(require_columns_checksums);
|
||||
loadChecksums(require_columns_checksums);
|
||||
loadIndexGranularity();
|
||||
calculateColumnsAndSecondaryIndicesSizesOnDisk();
|
||||
loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity`
|
||||
loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`.
|
||||
loadPartitionAndMinMaxIndex();
|
||||
if (!parent_part)
|
||||
{
|
||||
loadTTLInfos();
|
||||
loadProjections(require_columns_checksums, check_consistency);
|
||||
}
|
||||
|
||||
if (check_consistency)
|
||||
checkConsistency(require_columns_checksums);
|
||||
|
||||
loadDefaultCompressionCodec();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// There could be conditions that data part to be loaded is broken, but some of meta infos are already written
|
||||
// into meta data before exception, need to clean them all.
|
||||
metadata_manager->deleteAll(/*include_projection*/ true);
|
||||
metadata_manager->assertAllDeleted(/*include_projection*/ true);
|
||||
throw;
|
||||
}
|
||||
|
||||
if (check_consistency)
|
||||
checkConsistency(require_columns_checksums);
|
||||
|
||||
loadDefaultCompressionCodec();
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::appendFilesOfColumnsChecksumsIndexes(Strings & files, bool include_projection) const
|
||||
|
@ -1314,9 +1314,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
|
||||
if (!parts_from_wal.empty())
|
||||
loadDataPartsFromWAL(broken_parts_to_detach, duplicate_parts_to_remove, parts_from_wal, part_lock);
|
||||
|
||||
for (auto & part : duplicate_parts_to_remove)
|
||||
part->remove();
|
||||
|
||||
for (auto & part : broken_parts_to_detach)
|
||||
part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes
|
||||
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <Storages/StorageS3Settings.h>
|
||||
#include <Storages/StorageSnapshot.h>
|
||||
#include <Storages/PartitionedSink.h>
|
||||
#include <Storages/getVirtualsForStorage.h>
|
||||
|
||||
#include <IO/ReadBufferFromS3.h>
|
||||
#include <IO/WriteBufferFromS3.h>
|
||||
@ -210,25 +211,16 @@ String StorageS3Source::KeysIterator::next()
|
||||
return pimpl->next();
|
||||
}
|
||||
|
||||
Block StorageS3Source::getHeader(Block sample_block, bool with_path_column, bool with_file_column)
|
||||
Block StorageS3Source::getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns)
|
||||
{
|
||||
if (with_path_column)
|
||||
sample_block.insert(
|
||||
{DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
|
||||
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
|
||||
"_path"});
|
||||
if (with_file_column)
|
||||
sample_block.insert(
|
||||
{DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
|
||||
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
|
||||
"_file"});
|
||||
for (const auto & virtual_column : requested_virtual_columns)
|
||||
sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name});
|
||||
|
||||
return sample_block;
|
||||
}
|
||||
|
||||
StorageS3Source::StorageS3Source(
|
||||
bool need_path,
|
||||
bool need_file,
|
||||
const std::vector<NameAndTypePair> & requested_virtual_columns_,
|
||||
const String & format_,
|
||||
String name_,
|
||||
const Block & sample_block_,
|
||||
@ -242,7 +234,7 @@ StorageS3Source::StorageS3Source(
|
||||
const String & bucket_,
|
||||
std::shared_ptr<IteratorWrapper> file_iterator_,
|
||||
const size_t download_thread_num_)
|
||||
: SourceWithProgress(getHeader(sample_block_, need_path, need_file))
|
||||
: SourceWithProgress(getHeader(sample_block_, requested_virtual_columns_))
|
||||
, WithContext(context_)
|
||||
, name(std::move(name_))
|
||||
, bucket(bucket_)
|
||||
@ -254,8 +246,7 @@ StorageS3Source::StorageS3Source(
|
||||
, client(client_)
|
||||
, sample_block(sample_block_)
|
||||
, format_settings(format_settings_)
|
||||
, with_file_column(need_file)
|
||||
, with_path_column(need_path)
|
||||
, requested_virtual_columns(requested_virtual_columns_)
|
||||
, file_iterator(file_iterator_)
|
||||
, download_thread_num(download_thread_num_)
|
||||
{
|
||||
@ -344,16 +335,18 @@ Chunk StorageS3Source::generate()
|
||||
{
|
||||
UInt64 num_rows = chunk.getNumRows();
|
||||
|
||||
if (with_path_column)
|
||||
chunk.addColumn(DataTypeLowCardinality{std::make_shared<DataTypeString>()}
|
||||
.createColumnConst(num_rows, file_path)
|
||||
->convertToFullColumnIfConst());
|
||||
if (with_file_column)
|
||||
for (const auto & virtual_column : requested_virtual_columns)
|
||||
{
|
||||
size_t last_slash_pos = file_path.find_last_of('/');
|
||||
chunk.addColumn(DataTypeLowCardinality{std::make_shared<DataTypeString>()}
|
||||
.createColumnConst(num_rows, file_path.substr(last_slash_pos + 1))
|
||||
->convertToFullColumnIfConst());
|
||||
if (virtual_column.name == "_path")
|
||||
{
|
||||
chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_path)->convertToFullColumnIfConst());
|
||||
}
|
||||
else if (virtual_column.name == "_file")
|
||||
{
|
||||
size_t last_slash_pos = file_path.find_last_of('/');
|
||||
auto column = virtual_column.type->createColumnConst(num_rows, file_path.substr(last_slash_pos + 1));
|
||||
chunk.addColumn(column->convertToFullColumnIfConst());
|
||||
}
|
||||
}
|
||||
|
||||
return chunk;
|
||||
@ -627,6 +620,13 @@ StorageS3::StorageS3(
|
||||
storage_metadata.setConstraints(constraints_);
|
||||
storage_metadata.setComment(comment);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
|
||||
auto default_virtuals = NamesAndTypesList{
|
||||
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
|
||||
|
||||
auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList();
|
||||
virtual_columns = getVirtualsForStorage(columns, default_virtuals);
|
||||
}
|
||||
|
||||
std::shared_ptr<StorageS3Source::IteratorWrapper> StorageS3::createFileIterator(const ClientAuthentication & client_auth, const std::vector<String> & keys, bool is_key_with_globs, bool distributed_processing, ContextPtr local_context)
|
||||
@ -674,14 +674,14 @@ Pipe StorageS3::read(
|
||||
updateClientAndAuthSettings(local_context, client_auth);
|
||||
|
||||
Pipes pipes;
|
||||
bool need_path_column = false;
|
||||
bool need_file_column = false;
|
||||
for (const auto & column : column_names)
|
||||
|
||||
std::unordered_set<String> column_names_set(column_names.begin(), column_names.end());
|
||||
std::vector<NameAndTypePair> requested_virtual_columns;
|
||||
|
||||
for (const auto & virtual_column : getVirtuals())
|
||||
{
|
||||
if (column == "_path")
|
||||
need_path_column = true;
|
||||
if (column == "_file")
|
||||
need_file_column = true;
|
||||
if (column_names_set.contains(virtual_column.name))
|
||||
requested_virtual_columns.push_back(virtual_column);
|
||||
}
|
||||
|
||||
std::shared_ptr<StorageS3Source::IteratorWrapper> iterator_wrapper = createFileIterator(client_auth, keys, is_key_with_globs, distributed_processing, local_context);
|
||||
@ -690,8 +690,18 @@ Pipe StorageS3::read(
|
||||
Block block_for_format;
|
||||
if (isColumnOriented())
|
||||
{
|
||||
auto fetch_columns = column_names;
|
||||
const auto & virtuals = getVirtuals();
|
||||
std::erase_if(
|
||||
fetch_columns,
|
||||
[&](const String & col)
|
||||
{ return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); });
|
||||
|
||||
if (fetch_columns.empty())
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
|
||||
|
||||
columns_description = ColumnsDescription{
|
||||
storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()};
|
||||
storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()};
|
||||
block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
|
||||
}
|
||||
else
|
||||
@ -704,8 +714,7 @@ Pipe StorageS3::read(
|
||||
for (size_t i = 0; i < num_streams; ++i)
|
||||
{
|
||||
pipes.emplace_back(std::make_shared<StorageS3Source>(
|
||||
need_path_column,
|
||||
need_file_column,
|
||||
requested_virtual_columns,
|
||||
format_name,
|
||||
getName(),
|
||||
block_for_format,
|
||||
@ -882,6 +891,8 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt
|
||||
configuration.access_key_id = arg_value->as<ASTLiteral>()->value.safeGet<String>();
|
||||
else if (arg_name == "secret_access_key")
|
||||
configuration.secret_access_key = arg_value->as<ASTLiteral>()->value.safeGet<String>();
|
||||
else if (arg_name == "filename")
|
||||
configuration.url = std::filesystem::path(configuration.url) / arg_value->as<ASTLiteral>()->value.safeGet<String>();
|
||||
else
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Unknown key-value argument `{}` for StorageS3, expected: url, [access_key_id, secret_access_key], name of used format and [compression_method].",
|
||||
@ -1081,9 +1092,7 @@ void registerStorageCOS(StorageFactory & factory)
|
||||
|
||||
NamesAndTypesList StorageS3::getVirtuals() const
|
||||
{
|
||||
return NamesAndTypesList{
|
||||
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
|
||||
return virtual_columns;
|
||||
}
|
||||
|
||||
bool StorageS3::supportsPartitionBy() const
|
||||
|
@ -58,11 +58,10 @@ public:
|
||||
|
||||
using IteratorWrapper = std::function<String()>;
|
||||
|
||||
static Block getHeader(Block sample_block, bool with_path_column, bool with_file_column);
|
||||
static Block getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns);
|
||||
|
||||
StorageS3Source(
|
||||
bool need_path,
|
||||
bool need_file,
|
||||
const std::vector<NameAndTypePair> & requested_virtual_columns_,
|
||||
const String & format,
|
||||
String name_,
|
||||
const Block & sample_block,
|
||||
@ -102,8 +101,7 @@ private:
|
||||
std::unique_ptr<PullingPipelineExecutor> reader;
|
||||
/// onCancel and generate can be called concurrently
|
||||
std::mutex reader_mutex;
|
||||
bool with_file_column = false;
|
||||
bool with_path_column = false;
|
||||
std::vector<NameAndTypePair> requested_virtual_columns;
|
||||
std::shared_ptr<IteratorWrapper> file_iterator;
|
||||
size_t download_thread_num = 1;
|
||||
|
||||
@ -196,6 +194,7 @@ private:
|
||||
|
||||
ClientAuthentication client_auth;
|
||||
std::vector<String> keys;
|
||||
NamesAndTypesList virtual_columns;
|
||||
|
||||
String format_name;
|
||||
UInt64 max_single_read_retries;
|
||||
|
@ -467,7 +467,7 @@ std::pair<BlocksPtr, Block> StorageWindowView::getNewBlocks(UInt32 watermark)
|
||||
|
||||
InterpreterSelectQuery fetch(
|
||||
getFetchColumnQuery(w_start, watermark),
|
||||
window_view_context,
|
||||
getContext(),
|
||||
getInnerStorage(),
|
||||
nullptr,
|
||||
SelectQueryOptions(QueryProcessingStage::FetchColumns));
|
||||
@ -509,11 +509,11 @@ std::pair<BlocksPtr, Block> StorageWindowView::getNewBlocks(UInt32 watermark)
|
||||
return StorageBlocks::createStorage(blocks_id_global, required_columns, std::move(pipes), QueryProcessingStage::WithMergeableState);
|
||||
};
|
||||
|
||||
TemporaryTableHolder blocks_storage(window_view_context, creator);
|
||||
TemporaryTableHolder blocks_storage(getContext(), creator);
|
||||
|
||||
InterpreterSelectQuery select(
|
||||
getFinalQuery(),
|
||||
window_view_context,
|
||||
getContext(),
|
||||
blocks_storage.getTable(),
|
||||
blocks_storage.getTable()->getInMemoryMetadataPtr(),
|
||||
SelectQueryOptions(QueryProcessingStage::Complete));
|
||||
@ -617,8 +617,8 @@ std::shared_ptr<ASTCreateQuery> StorageWindowView::getInnerTableCreateQuery(
|
||||
|
||||
auto t_sample_block
|
||||
= InterpreterSelectQuery(
|
||||
inner_select_query, window_view_context, getParentStorage(), nullptr,
|
||||
SelectQueryOptions(QueryProcessingStage::WithMergeableState)) .getSampleBlock();
|
||||
inner_select_query, getContext(), getParentStorage(), nullptr, SelectQueryOptions(QueryProcessingStage::WithMergeableState))
|
||||
.getSampleBlock();
|
||||
|
||||
auto columns_list = std::make_shared<ASTExpressionList>();
|
||||
|
||||
@ -891,7 +891,7 @@ void StorageWindowView::updateMaxWatermark(UInt32 watermark)
|
||||
|
||||
inline void StorageWindowView::cleanup()
|
||||
{
|
||||
InterpreterAlterQuery alter_query(getCleanupQuery(), window_view_context);
|
||||
InterpreterAlterQuery alter_query(getCleanupQuery(), getContext());
|
||||
alter_query.execute();
|
||||
|
||||
std::lock_guard lock(fire_signal_mutex);
|
||||
@ -999,9 +999,6 @@ StorageWindowView::StorageWindowView(
|
||||
, WithContext(context_->getGlobalContext())
|
||||
, log(&Poco::Logger::get(fmt::format("StorageWindowView({}.{})", table_id_.database_name, table_id_.table_name)))
|
||||
{
|
||||
window_view_context = Context::createCopy(getContext());
|
||||
window_view_context->makeQueryContext();
|
||||
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
storage_metadata.setColumns(columns_);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
@ -1089,11 +1086,11 @@ StorageWindowView::StorageWindowView(
|
||||
clean_interval_ms = getContext()->getSettingsRef().window_view_clean_interval.totalMilliseconds();
|
||||
next_fire_signal = getWindowUpperBound(std::time(nullptr));
|
||||
|
||||
clean_cache_task = window_view_context->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); });
|
||||
clean_cache_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); });
|
||||
if (is_proctime)
|
||||
fire_task = window_view_context->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireProc(); });
|
||||
fire_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireProc(); });
|
||||
else
|
||||
fire_task = window_view_context->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireEvent(); });
|
||||
fire_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireEvent(); });
|
||||
clean_cache_task->deactivate();
|
||||
fire_task->deactivate();
|
||||
}
|
||||
@ -1424,9 +1421,10 @@ Block & StorageWindowView::getHeader() const
|
||||
std::lock_guard lock(sample_block_lock);
|
||||
if (!sample_block)
|
||||
{
|
||||
sample_block = InterpreterSelectQuery(
|
||||
select_query->clone(), window_view_context, getParentStorage(), nullptr,
|
||||
SelectQueryOptions(QueryProcessingStage::Complete)).getSampleBlock();
|
||||
sample_block
|
||||
= InterpreterSelectQuery(
|
||||
select_query->clone(), getContext(), getParentStorage(), nullptr, SelectQueryOptions(QueryProcessingStage::Complete))
|
||||
.getSampleBlock();
|
||||
/// convert all columns to full columns
|
||||
/// in case some of them are constant
|
||||
for (size_t i = 0; i < sample_block.columns(); ++i)
|
||||
|
@ -157,7 +157,6 @@ private:
|
||||
/// Used to fetch the mergeable state and generate the final result. e.g. SELECT * FROM * GROUP BY tumble(____timestamp, *)
|
||||
ASTPtr final_query;
|
||||
|
||||
ContextMutablePtr window_view_context;
|
||||
bool is_proctime{true};
|
||||
bool is_time_column_func_now;
|
||||
bool is_tumble; // false if is hop
|
||||
@ -182,7 +181,6 @@ private:
|
||||
|
||||
/// Mutex for the blocks and ready condition
|
||||
std::mutex mutex;
|
||||
std::mutex flush_table_mutex;
|
||||
std::shared_mutex fire_signal_mutex;
|
||||
mutable std::mutex sample_block_lock; /// Mutex to protect access to sample block and inner_blocks_query
|
||||
|
||||
|
22
src/Storages/getVirtualsForStorage.cpp
Normal file
22
src/Storages/getVirtualsForStorage.cpp
Normal file
@ -0,0 +1,22 @@
|
||||
#include "getVirtualsForStorage.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
NamesAndTypesList getVirtualsForStorage(const NamesAndTypesList & storage_columns_, const NamesAndTypesList & default_virtuals_)
|
||||
{
|
||||
auto default_virtuals = default_virtuals_;
|
||||
auto storage_columns = storage_columns_;
|
||||
default_virtuals.sort();
|
||||
storage_columns.sort();
|
||||
|
||||
NamesAndTypesList result_virtuals;
|
||||
std::set_difference(
|
||||
default_virtuals.begin(), default_virtuals.end(), storage_columns.begin(), storage_columns.end(),
|
||||
std::back_inserter(result_virtuals),
|
||||
[](const NameAndTypePair & lhs, const NameAndTypePair & rhs){ return lhs.name < rhs.name; });
|
||||
|
||||
return result_virtuals;
|
||||
}
|
||||
|
||||
}
|
9
src/Storages/getVirtualsForStorage.h
Normal file
9
src/Storages/getVirtualsForStorage.h
Normal file
@ -0,0 +1,9 @@
|
||||
#pragma once
|
||||
#include <Core/NamesAndTypes.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
NamesAndTypesList getVirtualsForStorage(const NamesAndTypesList & storage_columns_, const NamesAndTypesList & default_virtuals_);
|
||||
|
||||
}
|
@ -10,7 +10,7 @@ class TableFunctionHive : public ITableFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "hive";
|
||||
static constexpr auto storage_type_name = "hive";
|
||||
static constexpr auto storage_type_name = "Hive";
|
||||
std::string getName() const override { return name; }
|
||||
|
||||
bool hasStaticStructure() const override { return true; }
|
||||
|
@ -14,11 +14,16 @@ namespace ProfileEvents
|
||||
namespace DB
|
||||
{
|
||||
|
||||
AccessType ITableFunction::getSourceAccessType() const
|
||||
{
|
||||
return StorageFactory::instance().getSourceAccessType(getStorageTypeName());
|
||||
}
|
||||
|
||||
StoragePtr ITableFunction::execute(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name,
|
||||
ColumnsDescription cached_columns, bool use_global_context) const
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::TableFunctionExecute);
|
||||
context->checkAccess(AccessType::CREATE_TEMPORARY_TABLE | StorageFactory::instance().getSourceAccessType(getStorageTypeName()));
|
||||
context->checkAccess(AccessType::CREATE_TEMPORARY_TABLE | getSourceAccessType());
|
||||
|
||||
auto context_to_use = use_global_context ? context->getGlobalContext() : context;
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Access/Common/AccessType.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
@ -71,7 +72,10 @@ public:
|
||||
private:
|
||||
virtual StoragePtr executeImpl(
|
||||
const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const = 0;
|
||||
|
||||
virtual const char * getStorageTypeName() const = 0;
|
||||
|
||||
virtual AccessType getSourceAccessType() const;
|
||||
};
|
||||
|
||||
using TableFunctionPtr = std::shared_ptr<ITableFunction>;
|
||||
|
@ -39,6 +39,8 @@ protected:
|
||||
|
||||
const char * getStorageTypeName() const override { return "HDFSCluster"; }
|
||||
|
||||
AccessType getSourceAccessType() const override { return AccessType::HDFS; }
|
||||
|
||||
ColumnsDescription getActualTableStructure(ContextPtr) const override;
|
||||
void parseArguments(const ASTPtr &, ContextPtr) override;
|
||||
|
||||
|
@ -18,6 +18,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
|
||||
}
|
||||
|
||||
void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr context)
|
||||
@ -29,6 +30,12 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr
|
||||
|
||||
auto args = function->arguments->children;
|
||||
|
||||
if (args.empty())
|
||||
{
|
||||
structure = "auto";
|
||||
return;
|
||||
}
|
||||
|
||||
if (args.size() != 1)
|
||||
throw Exception("Table function '" + getName() + "' requires exactly 1 argument: structure",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
@ -38,6 +45,16 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr
|
||||
|
||||
ColumnsDescription TableFunctionInput::getActualTableStructure(ContextPtr context) const
|
||||
{
|
||||
if (structure == "auto")
|
||||
{
|
||||
if (structure_hint.empty())
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
|
||||
"Table function '{}' was used without structure argument but structure could not be determined automatically. Please, "
|
||||
"provide structure manually",
|
||||
getName());
|
||||
return structure_hint;
|
||||
}
|
||||
return parseColumnsListFromString(structure, context);
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,8 @@ public:
|
||||
static constexpr auto name = "input";
|
||||
std::string getName() const override { return name; }
|
||||
bool hasStaticStructure() const override { return true; }
|
||||
bool needStructureHint() const override { return true; }
|
||||
void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
|
||||
|
||||
private:
|
||||
StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override;
|
||||
@ -25,6 +27,7 @@ private:
|
||||
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
|
||||
|
||||
String structure;
|
||||
ColumnsDescription structure_hint;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <Storages/StorageS3.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include "registerTableFunctions.h"
|
||||
#include <filesystem>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -37,6 +38,8 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar
|
||||
s3_configuration.access_key_id = arg_value->as<ASTLiteral>()->value.safeGet<String>();
|
||||
else if (arg_name == "secret_access_key")
|
||||
s3_configuration.secret_access_key = arg_value->as<ASTLiteral>()->value.safeGet<String>();
|
||||
else if (arg_name == "filename")
|
||||
s3_configuration.url = std::filesystem::path(s3_configuration.url) / arg_value->as<ASTLiteral>()->value.safeGet<String>();
|
||||
else
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message);
|
||||
}
|
||||
|
@ -45,6 +45,8 @@ protected:
|
||||
|
||||
const char * getStorageTypeName() const override { return "S3Cluster"; }
|
||||
|
||||
AccessType getSourceAccessType() const override { return AccessType::S3; }
|
||||
|
||||
ColumnsDescription getActualTableStructure(ContextPtr) const override;
|
||||
void parseArguments(const ASTPtr &, ContextPtr) override;
|
||||
|
||||
|
@ -264,7 +264,7 @@ def main():
|
||||
version_type = "stable"
|
||||
official_flag = True
|
||||
|
||||
update_version_local(REPO_COPY, version, version_type)
|
||||
update_version_local(version, version_type)
|
||||
|
||||
logging.info("Updated local files with version")
|
||||
|
||||
|
@ -397,17 +397,19 @@ def main():
|
||||
|
||||
images_dict = get_images_dict(GITHUB_WORKSPACE, "docker/images.json")
|
||||
|
||||
pr_info = PRInfo()
|
||||
if args.all:
|
||||
pr_info = PRInfo()
|
||||
pr_info.changed_files = set(images_dict.keys())
|
||||
elif args.image_path:
|
||||
pr_info = PRInfo()
|
||||
pr_info.changed_files = set(i for i in args.image_path)
|
||||
else:
|
||||
pr_info = PRInfo(need_changed_files=True)
|
||||
pr_info.fetch_changed_files()
|
||||
|
||||
changed_images = get_changed_docker_images(pr_info, images_dict)
|
||||
logging.info("Has changed images %s", ", ".join([im.path for im in changed_images]))
|
||||
if changed_images:
|
||||
logging.info(
|
||||
"Has changed images: %s", ", ".join([im.path for im in changed_images])
|
||||
)
|
||||
|
||||
image_versions, result_version = gen_versions(pr_info, args.suffix)
|
||||
|
||||
|
@ -56,18 +56,20 @@ def get_images_with_versions(
|
||||
for i in range(10):
|
||||
try:
|
||||
logging.info("Pulling image %s", docker_image)
|
||||
latest_error = subprocess.check_output(
|
||||
subprocess.check_output(
|
||||
f"docker pull {docker_image}",
|
||||
stderr=subprocess.STDOUT,
|
||||
shell=True,
|
||||
)
|
||||
break
|
||||
except Exception as ex:
|
||||
latest_error = ex
|
||||
time.sleep(i * 3)
|
||||
logging.info("Got execption pulling docker %s", ex)
|
||||
else:
|
||||
raise Exception(
|
||||
f"Cannot pull dockerhub for image docker pull {docker_image} because of {latest_error}"
|
||||
"Cannot pull dockerhub for image docker pull "
|
||||
f"{docker_image} because of {latest_error}"
|
||||
)
|
||||
|
||||
return docker_images
|
||||
|
360
tests/ci/docker_server.py
Normal file
360
tests/ci/docker_server.py
Normal file
@ -0,0 +1,360 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# here
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
from os import path as p, makedirs
|
||||
from typing import List, Tuple
|
||||
|
||||
from github import Github
|
||||
|
||||
from build_check import get_release_or_pr
|
||||
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
|
||||
from commit_status_helper import post_commit_status
|
||||
from docker_images_check import DockerImage
|
||||
from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET
|
||||
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
|
||||
from pr_info import PRInfo
|
||||
from s3_helper import S3Helper
|
||||
from stopwatch import Stopwatch
|
||||
from upload_result_helper import upload_results
|
||||
from version_helper import (
|
||||
ClickHouseVersion,
|
||||
get_tagged_versions,
|
||||
get_version_from_repo,
|
||||
get_version_from_string,
|
||||
)
|
||||
|
||||
TEMP_PATH = p.join(RUNNER_TEMP, "docker_images_check")
|
||||
BUCKETS = {"amd64": "package_release", "arm64": "package_aarch64"}
|
||||
|
||||
|
||||
class DelOS(argparse.Action):
|
||||
def __call__(self, _, namespace, __, option_string=None):
|
||||
no_build = self.dest[3:] if self.dest.startswith("no_") else self.dest
|
||||
if no_build in namespace.os:
|
||||
namespace.os.remove(no_build)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description="A program to build clickhouse-server image, both alpine and "
|
||||
"ubuntu versions",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--version",
|
||||
type=version_arg,
|
||||
default=get_version_from_repo().string,
|
||||
help="a version to build",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--release-type",
|
||||
type=str,
|
||||
choices=("auto", "latest", "major", "minor", "patch", "head"),
|
||||
default="head",
|
||||
help="version part that will be updated when '--version' is set; "
|
||||
"'auto' is a special case, it will get versions from github and detect the "
|
||||
"release type (latest, major, minor or patch) automatically",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--image-path",
|
||||
type=str,
|
||||
default="docker/server",
|
||||
help="a path to docker context directory",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--image-repo",
|
||||
type=str,
|
||||
default="clickhouse/clickhouse-server",
|
||||
help="image name on docker hub",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--bucket-prefix",
|
||||
help="if set, then is used as source for deb and tgz files",
|
||||
)
|
||||
parser.add_argument("--reports", default=True, help=argparse.SUPPRESS)
|
||||
parser.add_argument(
|
||||
"--no-reports",
|
||||
action="store_false",
|
||||
dest="reports",
|
||||
default=argparse.SUPPRESS,
|
||||
help="don't push reports to S3 and github",
|
||||
)
|
||||
parser.add_argument("--push", default=True, help=argparse.SUPPRESS)
|
||||
parser.add_argument(
|
||||
"--no-push-images",
|
||||
action="store_false",
|
||||
dest="push",
|
||||
default=argparse.SUPPRESS,
|
||||
help="don't push images to docker hub",
|
||||
)
|
||||
parser.add_argument("--os", default=["ubuntu", "alpine"], help=argparse.SUPPRESS)
|
||||
parser.add_argument(
|
||||
"--no-ubuntu",
|
||||
action=DelOS,
|
||||
nargs=0,
|
||||
default=argparse.SUPPRESS,
|
||||
help="don't build ubuntu image",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-alpine",
|
||||
action=DelOS,
|
||||
nargs=0,
|
||||
default=argparse.SUPPRESS,
|
||||
help="don't build alpine image",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def version_arg(version: str) -> ClickHouseVersion:
|
||||
try:
|
||||
return get_version_from_string(version)
|
||||
except ValueError as e:
|
||||
raise argparse.ArgumentTypeError(e)
|
||||
|
||||
|
||||
def auto_release_type(version: ClickHouseVersion, release_type: str) -> str:
|
||||
if release_type != "auto":
|
||||
return release_type
|
||||
|
||||
git_versions = get_tagged_versions()
|
||||
reference_version = git_versions[0]
|
||||
for i in reversed(range(len(git_versions))):
|
||||
if git_versions[i] < version:
|
||||
if i == len(git_versions) - 1:
|
||||
return "latest"
|
||||
reference_version = git_versions[i + 1]
|
||||
break
|
||||
|
||||
if version.major < reference_version.major:
|
||||
return "major"
|
||||
if version.minor < reference_version.minor:
|
||||
return "minor"
|
||||
if version.patch < reference_version.patch:
|
||||
return "patch"
|
||||
|
||||
raise ValueError(
|
||||
"Release type 'tweak' is not supported for "
|
||||
f"{version.string} < {reference_version.string}"
|
||||
)
|
||||
|
||||
|
||||
def gen_tags(version: ClickHouseVersion, release_type: str) -> List[str]:
|
||||
"""
|
||||
22.2.2.2 + latest:
|
||||
- latest
|
||||
- 22
|
||||
- 22.2
|
||||
- 22.2.2
|
||||
- 22.2.2.2
|
||||
22.2.2.2 + major:
|
||||
- 22
|
||||
- 22.2
|
||||
- 22.2.2
|
||||
- 22.2.2.2
|
||||
22.2.2.2 + minor:
|
||||
- 22.2
|
||||
- 22.2.2
|
||||
- 22.2.2.2
|
||||
22.2.2.2 + patch:
|
||||
- 22.2.2
|
||||
- 22.2.2.2
|
||||
22.2.2.2 + head:
|
||||
- head
|
||||
"""
|
||||
parts = version.string.split(".")
|
||||
tags = []
|
||||
if release_type == "latest":
|
||||
tags.append(release_type)
|
||||
for i in range(len(parts)):
|
||||
tags.append(".".join(parts[: i + 1]))
|
||||
elif release_type == "major":
|
||||
for i in range(len(parts)):
|
||||
tags.append(".".join(parts[: i + 1]))
|
||||
elif release_type == "minor":
|
||||
for i in range(1, len(parts)):
|
||||
tags.append(".".join(parts[: i + 1]))
|
||||
elif release_type == "patch":
|
||||
for i in range(2, len(parts)):
|
||||
tags.append(".".join(parts[: i + 1]))
|
||||
elif release_type == "head":
|
||||
tags.append(release_type)
|
||||
else:
|
||||
raise ValueError(f"{release_type} is not valid release part")
|
||||
return tags
|
||||
|
||||
|
||||
def buildx_args(bucket_prefix: str, arch: str) -> List[str]:
|
||||
args = [f"--platform=linux/{arch}", f"--label=build-url={GITHUB_RUN_URL}"]
|
||||
if bucket_prefix:
|
||||
url = p.join(bucket_prefix, BUCKETS[arch]) # to prevent a double //
|
||||
args.append(f"--build-arg=REPOSITORY='{url}'")
|
||||
args.append(f"--build-arg=deb_location_url='{url}'")
|
||||
return args
|
||||
|
||||
|
||||
def build_and_push_image(
|
||||
image: DockerImage,
|
||||
push: bool,
|
||||
bucket_prefix: str,
|
||||
os: str,
|
||||
tag: str,
|
||||
version: ClickHouseVersion,
|
||||
) -> List[Tuple[str, str]]:
|
||||
result = []
|
||||
if os != "ubuntu":
|
||||
tag += f"-{os}"
|
||||
init_args = ["docker", "buildx", "build"]
|
||||
if push:
|
||||
init_args.append("--push")
|
||||
init_args.append("--output=type=image,push-by-digest=true")
|
||||
init_args.append(f"--tag={image.repo}")
|
||||
else:
|
||||
init_args.append("--output=type=docker")
|
||||
|
||||
# `docker buildx build --load` does not support multiple images currently
|
||||
# images must be built separately and merged together with `docker manifest`
|
||||
digests = []
|
||||
for arch in BUCKETS:
|
||||
arch_tag = f"{tag}-{arch}"
|
||||
metadata_path = p.join(TEMP_PATH, arch_tag)
|
||||
dockerfile = p.join(image.full_path, f"Dockerfile.{os}")
|
||||
cmd_args = list(init_args)
|
||||
cmd_args.extend(buildx_args(bucket_prefix, arch))
|
||||
if not push:
|
||||
cmd_args.append(f"--tag={image.repo}:{arch_tag}")
|
||||
cmd_args.extend(
|
||||
[
|
||||
f"--metadata-file={metadata_path}",
|
||||
f"--build-arg=VERSION='{version.string}'",
|
||||
"--progress=plain",
|
||||
f"--file={dockerfile}",
|
||||
image.full_path,
|
||||
]
|
||||
)
|
||||
cmd = " ".join(cmd_args)
|
||||
logging.info("Building image %s:%s for arch %s: %s", image.repo, tag, arch, cmd)
|
||||
with subprocess.Popen(
|
||||
cmd,
|
||||
shell=True,
|
||||
stderr=subprocess.STDOUT,
|
||||
stdout=subprocess.PIPE,
|
||||
universal_newlines=True,
|
||||
) as process:
|
||||
for line in process.stdout: # type: ignore
|
||||
print(line, end="")
|
||||
retcode = process.wait()
|
||||
if retcode != 0:
|
||||
result.append((f"{image.repo}:{tag}-{arch}", "FAIL"))
|
||||
return result
|
||||
result.append((f"{image.repo}:{tag}-{arch}", "OK"))
|
||||
with open(metadata_path, "rb") as m:
|
||||
metadata = json.load(m)
|
||||
digests.append(metadata["containerimage.digest"])
|
||||
if push:
|
||||
cmd = (
|
||||
"docker buildx imagetools create "
|
||||
f"--tag {image.repo}:{tag} {' '.join(digests)}"
|
||||
)
|
||||
logging.info("Pushing merged %s:%s image: %s", image.repo, tag, cmd)
|
||||
with subprocess.Popen(
|
||||
cmd,
|
||||
shell=True,
|
||||
stderr=subprocess.STDOUT,
|
||||
stdout=subprocess.PIPE,
|
||||
universal_newlines=True,
|
||||
) as process:
|
||||
for line in process.stdout: # type: ignore
|
||||
print(line, end="")
|
||||
retcode = process.wait()
|
||||
if retcode != 0:
|
||||
result.append((f"{image.repo}:{tag}", "FAIL"))
|
||||
else:
|
||||
logging.info(
|
||||
"Merging is available only on push, separate %s images are created",
|
||||
f"{image.repo}:{tag}-$arch",
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
stopwatch = Stopwatch()
|
||||
makedirs(TEMP_PATH, exist_ok=True)
|
||||
|
||||
args = parse_args()
|
||||
image = DockerImage(args.image_path, args.image_repo, False)
|
||||
args.release_type = auto_release_type(args.version, args.release_type)
|
||||
tags = gen_tags(args.version, args.release_type)
|
||||
NAME = f"Docker image {image.repo} building check (actions)"
|
||||
pr_info = None
|
||||
if CI:
|
||||
pr_info = PRInfo()
|
||||
release_or_pr = get_release_or_pr(pr_info, {"package_type": ""}, args.version)
|
||||
args.bucket_prefix = (
|
||||
f"https://s3.amazonaws.com/{S3_BUILDS_BUCKET}/"
|
||||
f"{release_or_pr}/{pr_info.sha}"
|
||||
)
|
||||
|
||||
if args.push:
|
||||
subprocess.check_output( # pylint: disable=unexpected-keyword-arg
|
||||
"docker login --username 'robotclickhouse' --password-stdin",
|
||||
input=get_parameter_from_ssm("dockerhub_robot_password"),
|
||||
encoding="utf-8",
|
||||
shell=True,
|
||||
)
|
||||
NAME = f"Docker image {image.repo} build and push (actions)"
|
||||
|
||||
logging.info("Following tags will be created: %s", ", ".join(tags))
|
||||
status = "success"
|
||||
test_results = [] # type: List[Tuple[str, str]]
|
||||
for os in args.os:
|
||||
for tag in tags:
|
||||
test_results.extend(
|
||||
build_and_push_image(
|
||||
image, args.push, args.bucket_prefix, os, tag, args.version
|
||||
)
|
||||
)
|
||||
if test_results[-1][1] != "OK":
|
||||
status = "failure"
|
||||
|
||||
pr_info = pr_info or PRInfo()
|
||||
s3_helper = S3Helper("https://s3.amazonaws.com")
|
||||
|
||||
url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME)
|
||||
|
||||
print(f"::notice ::Report url: {url}")
|
||||
print(f'::set-output name=url_output::"{url}"')
|
||||
|
||||
if not args.reports:
|
||||
return
|
||||
|
||||
description = f"Processed tags: {', '.join(tags)}"
|
||||
|
||||
if len(description) >= 140:
|
||||
description = description[:136] + "..."
|
||||
|
||||
gh = Github(get_best_robot_token())
|
||||
post_commit_status(gh, pr_info.sha, NAME, description, status, url)
|
||||
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
test_results,
|
||||
status,
|
||||
stopwatch.duration_seconds,
|
||||
stopwatch.start_time_str,
|
||||
url,
|
||||
NAME,
|
||||
)
|
||||
ch_helper = ClickHouseHelper()
|
||||
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -2,12 +2,16 @@
|
||||
|
||||
import os
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from env_helper import GITHUB_RUN_URL
|
||||
from pr_info import PRInfo
|
||||
import docker_images_check as di
|
||||
|
||||
with patch("git_helper.Git"):
|
||||
from version_helper import get_version_from_string, get_tagged_versions
|
||||
import docker_server as ds
|
||||
|
||||
# di.logging.basicConfig(level=di.logging.INFO)
|
||||
|
||||
|
||||
@ -221,5 +225,46 @@ class TestDockerImageCheck(unittest.TestCase):
|
||||
self.assertEqual(results, expected)
|
||||
|
||||
|
||||
class TestDockerServer(unittest.TestCase):
|
||||
def test_gen_tags(self):
|
||||
version = get_version_from_string("22.2.2.2")
|
||||
cases = (
|
||||
("latest", ["latest", "22", "22.2", "22.2.2", "22.2.2.2"]),
|
||||
("major", ["22", "22.2", "22.2.2", "22.2.2.2"]),
|
||||
("minor", ["22.2", "22.2.2", "22.2.2.2"]),
|
||||
("patch", ["22.2.2", "22.2.2.2"]),
|
||||
("head", ["head"]),
|
||||
)
|
||||
for case in cases:
|
||||
release_type = case[0]
|
||||
self.assertEqual(case[1], ds.gen_tags(version, release_type))
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
ds.gen_tags(version, "auto")
|
||||
|
||||
@patch("docker_server.get_tagged_versions")
|
||||
def test_auto_release_type(self, mock_tagged_versions: MagicMock):
|
||||
mock_tagged_versions.return_value = [
|
||||
get_version_from_string("1.1.1.1"),
|
||||
get_version_from_string("1.2.1.1"),
|
||||
get_version_from_string("2.1.1.1"),
|
||||
get_version_from_string("2.2.1.1"),
|
||||
get_version_from_string("2.2.2.1"),
|
||||
]
|
||||
cases = (
|
||||
(get_version_from_string("1.0.1.1"), "minor"),
|
||||
(get_version_from_string("1.1.2.1"), "minor"),
|
||||
(get_version_from_string("1.3.1.1"), "major"),
|
||||
(get_version_from_string("2.1.2.1"), "minor"),
|
||||
(get_version_from_string("2.2.1.3"), "patch"),
|
||||
(get_version_from_string("2.2.3.1"), "latest"),
|
||||
(get_version_from_string("2.3.1.1"), "latest"),
|
||||
)
|
||||
_ = get_tagged_versions()
|
||||
for case in cases:
|
||||
release = ds.auto_release_type(case[0], "auto")
|
||||
self.assertEqual(case[1], release)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
@ -1,7 +1,11 @@
|
||||
import os
|
||||
from os import path as p
|
||||
|
||||
module_dir = p.abspath(p.dirname(__file__))
|
||||
git_root = p.abspath(p.join(module_dir, "..", ".."))
|
||||
|
||||
CI = bool(os.getenv("CI"))
|
||||
TEMP_PATH = os.getenv("TEMP_PATH", os.path.abspath("."))
|
||||
TEMP_PATH = os.getenv("TEMP_PATH", module_dir)
|
||||
|
||||
CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH)
|
||||
CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN")
|
||||
@ -9,11 +13,11 @@ GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH")
|
||||
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")
|
||||
GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID", "0")
|
||||
GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com")
|
||||
GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))
|
||||
GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", git_root)
|
||||
GITHUB_RUN_URL = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
|
||||
IMAGES_PATH = os.getenv("IMAGES_PATH")
|
||||
REPORTS_PATH = os.getenv("REPORTS_PATH", "./reports")
|
||||
REPO_COPY = os.getenv("REPO_COPY", os.path.abspath("../../"))
|
||||
RUNNER_TEMP = os.getenv("RUNNER_TEMP", os.path.abspath("./tmp"))
|
||||
REPORTS_PATH = os.getenv("REPORTS_PATH", p.abspath(p.join(module_dir, "./reports")))
|
||||
REPO_COPY = os.getenv("REPO_COPY", git_root)
|
||||
RUNNER_TEMP = os.getenv("RUNNER_TEMP", p.abspath(p.join(module_dir, "./tmp")))
|
||||
S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds")
|
||||
S3_TEST_REPORTS_BUCKET = os.getenv("S3_TEST_REPORTS_BUCKET", "clickhouse-test-reports")
|
||||
|
@ -3,7 +3,7 @@ import argparse
|
||||
import os.path as p
|
||||
import re
|
||||
import subprocess
|
||||
from typing import Optional
|
||||
from typing import List, Optional
|
||||
|
||||
# ^ and $ match subline in `multiple\nlines`
|
||||
# \A and \Z match only start and end of the whole string
|
||||
@ -89,7 +89,7 @@ class Git:
|
||||
self.run(f"git rev-list {self.latest_tag}..HEAD --count")
|
||||
)
|
||||
|
||||
def _check_tag(self, value: str):
|
||||
def check_tag(self, value: str):
|
||||
if value == "":
|
||||
return
|
||||
if not self._tag_pattern.match(value):
|
||||
@ -101,7 +101,7 @@ class Git:
|
||||
|
||||
@latest_tag.setter
|
||||
def latest_tag(self, value: str):
|
||||
self._check_tag(value)
|
||||
self.check_tag(value)
|
||||
self._latest_tag = value
|
||||
|
||||
@property
|
||||
@ -110,7 +110,7 @@ class Git:
|
||||
|
||||
@new_tag.setter
|
||||
def new_tag(self, value: str):
|
||||
self._check_tag(value)
|
||||
self.check_tag(value)
|
||||
self._new_tag = value
|
||||
|
||||
@property
|
||||
@ -122,3 +122,6 @@ class Git:
|
||||
|
||||
version = self.latest_tag.split("-", maxsplit=1)[0]
|
||||
return int(version.split(".")[-1]) + self.commits_since_tag
|
||||
|
||||
def get_tags(self) -> List[str]:
|
||||
return self.run("git tag").split()
|
||||
|
@ -1,10 +1,8 @@
|
||||
#!/usr/bin/env python3
|
||||
import datetime
|
||||
import logging
|
||||
import os.path as p
|
||||
import subprocess
|
||||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||||
from typing import Dict, Tuple, Union
|
||||
from typing import Dict, List, Tuple, Union
|
||||
|
||||
from git_helper import Git, removeprefix
|
||||
|
||||
@ -49,12 +47,16 @@ class ClickHouseVersion:
|
||||
patch: Union[int, str],
|
||||
revision: Union[int, str],
|
||||
git: Git,
|
||||
tweak: str = None,
|
||||
):
|
||||
self._major = int(major)
|
||||
self._minor = int(minor)
|
||||
self._patch = int(patch)
|
||||
self._revision = int(revision)
|
||||
self._git = git
|
||||
self._tweak = None
|
||||
if tweak is not None:
|
||||
self._tweak = int(tweak)
|
||||
self._describe = ""
|
||||
|
||||
def update(self, part: str) -> "ClickHouseVersion":
|
||||
@ -89,7 +91,7 @@ class ClickHouseVersion:
|
||||
|
||||
@property
|
||||
def tweak(self) -> int:
|
||||
return self._git.tweak
|
||||
return self._tweak or self._git.tweak
|
||||
|
||||
@property
|
||||
def revision(self) -> int:
|
||||
@ -129,6 +131,25 @@ class ClickHouseVersion:
|
||||
raise ValueError(f"version type {version_type} not in {VersionType.VALID}")
|
||||
self._describe = f"v{self.string}-{version_type}"
|
||||
|
||||
def __eq__(self, other) -> bool:
|
||||
if not isinstance(self, type(other)):
|
||||
return NotImplemented
|
||||
return (
|
||||
self.major == other.major
|
||||
and self.minor == other.minor
|
||||
and self.patch == other.patch
|
||||
and self.tweak == other.tweak
|
||||
)
|
||||
|
||||
def __lt__(self, other: "ClickHouseVersion") -> bool:
|
||||
for part in ("major", "minor", "patch", "tweak"):
|
||||
if getattr(self, part) < getattr(other, part):
|
||||
return True
|
||||
elif getattr(self, part) > getattr(other, part):
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class VersionType:
|
||||
LTS = "lts"
|
||||
@ -138,6 +159,14 @@ class VersionType:
|
||||
VALID = (TESTING, PRESTABLE, STABLE, LTS)
|
||||
|
||||
|
||||
def validate_version(version: str):
|
||||
parts = version.split(".")
|
||||
if len(parts) != 4:
|
||||
raise ValueError(f"{version} does not contain 4 parts")
|
||||
for part in parts:
|
||||
int(part)
|
||||
|
||||
|
||||
def get_abs_path(path: str) -> str:
|
||||
return p.abspath(p.join(git.root, path))
|
||||
|
||||
@ -176,6 +205,29 @@ def get_version_from_repo(
|
||||
)
|
||||
|
||||
|
||||
def get_version_from_string(version: str) -> ClickHouseVersion:
|
||||
validate_version(version)
|
||||
parts = version.split(".")
|
||||
return ClickHouseVersion(parts[0], parts[1], parts[2], -1, git, parts[3])
|
||||
|
||||
|
||||
def get_version_from_tag(tag: str) -> ClickHouseVersion:
|
||||
git.check_tag(tag)
|
||||
tag = tag[1:].split("-")[0]
|
||||
return get_version_from_string(tag)
|
||||
|
||||
|
||||
def get_tagged_versions() -> List[ClickHouseVersion]:
|
||||
versions = []
|
||||
for tag in git.get_tags():
|
||||
try:
|
||||
version = get_version_from_tag(tag)
|
||||
versions.append(version)
|
||||
except Exception:
|
||||
continue
|
||||
return sorted(versions)
|
||||
|
||||
|
||||
def update_cmake_version(
|
||||
version: ClickHouseVersion,
|
||||
versions_path: str = FILE_WITH_VERSION_PATH,
|
||||
@ -185,22 +237,6 @@ def update_cmake_version(
|
||||
f.write(VERSIONS_TEMPLATE.format_map(version.as_dict()))
|
||||
|
||||
|
||||
def _update_changelog(repo_path: str, version: ClickHouseVersion):
|
||||
cmd = """sed \
|
||||
-e "s/[@]VERSION_STRING[@]/{version_str}/g" \
|
||||
-e "s/[@]DATE[@]/{date}/g" \
|
||||
-e "s/[@]AUTHOR[@]/clickhouse-release/g" \
|
||||
-e "s/[@]EMAIL[@]/clickhouse-release@yandex-team.ru/g" \
|
||||
< {in_path} > {changelog_path}
|
||||
""".format(
|
||||
version_str=version.string,
|
||||
date=datetime.datetime.now().strftime("%a, %d %b %Y %H:%M:%S") + " +0300",
|
||||
in_path=p.join(repo_path, CHANGELOG_IN_PATH),
|
||||
changelog_path=p.join(repo_path, CHANGELOG_PATH),
|
||||
)
|
||||
subprocess.check_call(cmd, shell=True)
|
||||
|
||||
|
||||
def update_contributors(
|
||||
relative_contributors_path: str = GENERATED_CONTRIBUTORS, force: bool = False
|
||||
):
|
||||
@ -225,22 +261,10 @@ def update_contributors(
|
||||
cfd.write(content)
|
||||
|
||||
|
||||
def _update_dockerfile(repo_path: str, version: ClickHouseVersion):
|
||||
version_str_for_docker = ".".join(
|
||||
[str(version.major), str(version.minor), str(version.patch), "*"]
|
||||
)
|
||||
cmd = "ls -1 {path}/docker/*/Dockerfile | xargs sed -i -r -e 's/ARG version=.+$/ARG version='{ver}'/'".format(
|
||||
path=repo_path, ver=version_str_for_docker
|
||||
)
|
||||
subprocess.check_call(cmd, shell=True)
|
||||
|
||||
|
||||
def update_version_local(repo_path, version, version_type="testing"):
|
||||
def update_version_local(version, version_type="testing"):
|
||||
update_contributors()
|
||||
version.with_description(version_type)
|
||||
update_cmake_version(version)
|
||||
_update_changelog(repo_path, version)
|
||||
_update_dockerfile(repo_path, version)
|
||||
|
||||
|
||||
def main():
|
||||
|
@ -22,5 +22,11 @@
|
||||
</header>
|
||||
</headers>
|
||||
</url_with_headers>
|
||||
<s3_conn>
|
||||
<url>http://localhost:11111/test/</url>
|
||||
<access_key_id>test</access_key_id>
|
||||
<secret_access_key>testtest</secret_access_key>
|
||||
<structure>auto</structure>
|
||||
</s3_conn>
|
||||
</named_collections>
|
||||
</clickhouse>
|
||||
|
@ -44,7 +44,7 @@ def dotnet_container():
|
||||
"-f",
|
||||
docker_compose,
|
||||
"up",
|
||||
"--no-recreate",
|
||||
"--force-recreate",
|
||||
"-d",
|
||||
"--no-build",
|
||||
]
|
||||
|
@ -55,7 +55,7 @@ def golang_container():
|
||||
"-f",
|
||||
docker_compose,
|
||||
"up",
|
||||
"--no-recreate",
|
||||
"--force-recreate",
|
||||
"-d",
|
||||
"--no-build",
|
||||
]
|
||||
@ -82,7 +82,7 @@ def php_container():
|
||||
"-f",
|
||||
docker_compose,
|
||||
"up",
|
||||
"--no-recreate",
|
||||
"--force-recreate",
|
||||
"-d",
|
||||
"--no-build",
|
||||
]
|
||||
@ -109,7 +109,7 @@ def nodejs_container():
|
||||
"-f",
|
||||
docker_compose,
|
||||
"up",
|
||||
"--no-recreate",
|
||||
"--force-recreate",
|
||||
"-d",
|
||||
"--no-build",
|
||||
]
|
||||
@ -136,7 +136,7 @@ def java_container():
|
||||
"-f",
|
||||
docker_compose,
|
||||
"up",
|
||||
"--no-recreate",
|
||||
"--force-recreate",
|
||||
"-d",
|
||||
"--no-build",
|
||||
]
|
||||
|
@ -56,7 +56,7 @@ def psql_client():
|
||||
"-f",
|
||||
docker_compose,
|
||||
"up",
|
||||
"--no-recreate",
|
||||
"--force-recreate",
|
||||
"-d",
|
||||
"--build",
|
||||
]
|
||||
@ -99,7 +99,7 @@ def java_container():
|
||||
"-f",
|
||||
docker_compose,
|
||||
"up",
|
||||
"--no-recreate",
|
||||
"--force-recreate",
|
||||
"-d",
|
||||
"--build",
|
||||
]
|
||||
|
@ -129,6 +129,7 @@ def test_quota_from_users_xml():
|
||||
1000,
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
]
|
||||
]
|
||||
)
|
||||
@ -349,6 +350,7 @@ def test_tracking_quota():
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
]
|
||||
]
|
||||
)
|
||||
@ -454,7 +456,7 @@ def test_exceed_quota():
|
||||
]
|
||||
)
|
||||
system_quota_limits(
|
||||
[["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]]
|
||||
[["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N", "\\N"]]
|
||||
)
|
||||
system_quota_usage(
|
||||
[
|
||||
@ -545,6 +547,7 @@ def test_exceed_quota():
|
||||
1000,
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
]
|
||||
]
|
||||
)
|
||||
@ -634,6 +637,7 @@ def test_add_remove_interval():
|
||||
1000,
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
]
|
||||
]
|
||||
)
|
||||
@ -695,6 +699,7 @@ def test_add_remove_interval():
|
||||
1000,
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
],
|
||||
[
|
||||
"myQuota",
|
||||
@ -709,6 +714,7 @@ def test_add_remove_interval():
|
||||
"\\N",
|
||||
20000,
|
||||
120,
|
||||
"\\N",
|
||||
],
|
||||
]
|
||||
)
|
||||
@ -842,6 +848,7 @@ def test_add_remove_interval():
|
||||
1000,
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
]
|
||||
]
|
||||
)
|
||||
@ -1003,6 +1010,7 @@ def test_add_remove_interval():
|
||||
1000,
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
]
|
||||
]
|
||||
)
|
||||
@ -1064,6 +1072,7 @@ def test_add_remove_quota():
|
||||
1000,
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
]
|
||||
]
|
||||
)
|
||||
@ -1136,6 +1145,7 @@ def test_add_remove_quota():
|
||||
1000,
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
],
|
||||
[
|
||||
"myQuota2",
|
||||
@ -1150,6 +1160,7 @@ def test_add_remove_quota():
|
||||
4000,
|
||||
400000,
|
||||
60,
|
||||
"\\N",
|
||||
],
|
||||
[
|
||||
"myQuota2",
|
||||
@ -1164,6 +1175,7 @@ def test_add_remove_quota():
|
||||
"\\N",
|
||||
"\\N",
|
||||
1800,
|
||||
"\\N",
|
||||
],
|
||||
]
|
||||
)
|
||||
@ -1226,6 +1238,7 @@ def test_add_remove_quota():
|
||||
1000,
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
]
|
||||
]
|
||||
)
|
||||
@ -1294,6 +1307,7 @@ def test_add_remove_quota():
|
||||
1000,
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
]
|
||||
]
|
||||
)
|
||||
@ -1356,6 +1370,7 @@ def test_reload_users_xml_by_timer():
|
||||
1000,
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
]
|
||||
]
|
||||
)
|
||||
@ -1382,7 +1397,7 @@ def test_reload_users_xml_by_timer():
|
||||
assert_eq_with_retry(
|
||||
instance,
|
||||
"SELECT * FROM system.quota_limits",
|
||||
[["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]],
|
||||
[["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N", "\\N"]],
|
||||
)
|
||||
|
||||
|
||||
@ -1481,15 +1496,15 @@ def test_dcl_management():
|
||||
== "CREATE QUOTA qA FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default\n"
|
||||
)
|
||||
assert re.match(
|
||||
"qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\n"
|
||||
"qA\\t\\t.*\\t39446190\\t1\\t321\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
|
||||
"qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\\t0\\t\\\\N\n"
|
||||
"qA\\t\\t.*\\t39446190\\t1\\t321\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n",
|
||||
instance.query("SHOW QUOTA"),
|
||||
)
|
||||
|
||||
instance.query("SELECT * from test_table")
|
||||
assert re.match(
|
||||
"qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\n"
|
||||
"qA\\t\\t.*\\t39446190\\t2\\t321\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
|
||||
"qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\\t0\\t\\\\N\n"
|
||||
"qA\\t\\t.*\\t39446190\\t2\\t321\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n",
|
||||
instance.query("SHOW QUOTA"),
|
||||
)
|
||||
|
||||
@ -1503,7 +1518,7 @@ def test_dcl_management():
|
||||
|
||||
instance.query("SELECT * from test_table")
|
||||
assert re.match(
|
||||
"qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n",
|
||||
"qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n",
|
||||
instance.query("SHOW QUOTA"),
|
||||
)
|
||||
|
||||
@ -1519,7 +1534,7 @@ def test_dcl_management():
|
||||
|
||||
instance.query("SELECT * from test_table")
|
||||
assert re.match(
|
||||
"qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n",
|
||||
"qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\\t0\\t\\\\N\n",
|
||||
instance.query("SHOW QUOTA"),
|
||||
)
|
||||
|
||||
@ -1563,6 +1578,7 @@ def test_query_inserts():
|
||||
1000,
|
||||
"\\N",
|
||||
"\\N",
|
||||
"\\N",
|
||||
]
|
||||
]
|
||||
)
|
||||
|
@ -554,6 +554,26 @@ def test_insert_select_schema_inference(started_cluster):
|
||||
assert int(result) == 1
|
||||
|
||||
|
||||
def test_virtual_columns_2(started_cluster):
|
||||
hdfs_api = started_cluster.hdfs_api
|
||||
|
||||
table_function = (
|
||||
f"hdfs('hdfs://hdfs1:9000/parquet_2', 'Parquet', 'a Int32, b String')"
|
||||
)
|
||||
node1.query(f"insert into table function {table_function} SELECT 1, 'kek'")
|
||||
|
||||
result = node1.query(f"SELECT _path FROM {table_function}")
|
||||
assert result.strip() == "hdfs://hdfs1:9000/parquet_2"
|
||||
|
||||
table_function = (
|
||||
f"hdfs('hdfs://hdfs1:9000/parquet_3', 'Parquet', 'a Int32, _path String')"
|
||||
)
|
||||
node1.query(f"insert into table function {table_function} SELECT 1, 'kek'")
|
||||
|
||||
result = node1.query(f"SELECT _path FROM {table_function}")
|
||||
assert result.strip() == "kek"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cluster.start()
|
||||
input("Cluster created, press any key to destroy...")
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user