Merge branch 'master' into format-settings-parsing

This commit is contained in:
Alexey Milovidov 2024-07-06 20:28:19 +02:00
commit 8719ef5a50
48 changed files with 1112 additions and 190 deletions

View File

@ -84,5 +84,5 @@ if (CMAKE_CROSSCOMPILING)
message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
endif ()
message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}")
message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILER_TARGET}")
endif ()

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.6.1.4423"
ARG VERSION="24.6.2.17"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

47
docker/reqgenerator.py Normal file
View File

@ -0,0 +1,47 @@
#!/usr/bin/env python3
# To run this script you must install docker and piddeptree python package
#
import subprocess
import os
import sys
def build_docker_deps(image_name, imagedir):
cmd = f"""docker run --entrypoint "/bin/bash" {image_name} -c "pip install pipdeptree 2>/dev/null 1>/dev/null && pipdeptree --freeze --warn silence | sed 's/ \+//g' | sort | uniq" > {imagedir}/requirements.txt"""
subprocess.check_call(cmd, shell=True)
def check_docker_file_install_with_pip(filepath):
image_name = None
with open(filepath, "r") as f:
for line in f:
if "docker build" in line:
arr = line.split(" ")
if len(arr) > 4:
image_name = arr[4]
if "pip3 install" in line or "pip install" in line:
return image_name, True
return image_name, False
def process_affected_images(images_dir):
for root, _dirs, files in os.walk(images_dir):
for f in files:
if f == "Dockerfile":
docker_file_path = os.path.join(root, f)
print("Checking image on path", docker_file_path)
image_name, has_pip = check_docker_file_install_with_pip(
docker_file_path
)
if has_pip:
print("Find pip in", image_name)
try:
build_docker_deps(image_name, root)
except Exception as ex:
print(ex)
else:
print("Pip not found in", docker_file_path)
process_affected_images(sys.argv[1])

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.6.1.4423"
ARG VERSION="24.6.2.17"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.6.1.4423"
ARG VERSION="24.6.2.17"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
#docker-official-library:off

View File

@ -19,10 +19,7 @@ RUN apt-get update \
odbcinst \
psmisc \
python3 \
python3-lxml \
python3-pip \
python3-requests \
python3-termcolor \
unixodbc \
pv \
jq \
@ -31,7 +28,8 @@ RUN apt-get update \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3
COPY requirements.txt /
RUN pip3 install --no-cache-dir -r /requirements.txt
# This symlink is required by gcc to find the lld linker
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
@ -39,6 +37,10 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d
RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake
# LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path.
# It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792
RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu
ARG CCACHE_VERSION=4.6.1
RUN mkdir /tmp/ccache \
&& cd /tmp/ccache \

View File

@ -0,0 +1,41 @@
Jinja2==3.1.3
MarkupSafe==2.1.5
PyJWT==2.3.0
PyYAML==6.0.1
Pygments==2.11.2
SecretStorage==3.3.1
blinker==1.4
certifi==2020.6.20
chardet==4.0.0
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
httplib2==0.20.2
idna==3.3
importlib-metadata==4.6.4
jeepney==0.7.1
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
lxml==4.8.0
more-itertools==8.10.0
numpy==1.26.3
oauthlib==3.2.0
packaging==24.1
pandas==1.5.3
pip==24.1.1
pipdeptree==2.23.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
python-dateutil==2.9.0.post0
pytz==2024.1
requests==2.32.3
scipy==1.12.0
setuptools==59.6.0
six==1.16.0
termcolor==1.1.0
urllib3==1.26.5
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -31,7 +31,8 @@ RUN apt-get update \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install Jinja2
COPY requirements.txt /
RUN pip3 install --no-cache-dir -r /requirements.txt
COPY * /

View File

@ -0,0 +1,27 @@
blinker==1.4
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
httplib2==0.20.2
importlib-metadata==4.6.4
jeepney==0.7.1
Jinja2==3.1.4
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
MarkupSafe==2.1.5
more-itertools==8.10.0
oauthlib==3.2.0
packaging==24.1
pip==24.1.1
pipdeptree==2.23.0
PyJWT==2.3.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -33,7 +33,8 @@ RUN apt-get update \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install pycurl
COPY requirements.txt /
RUN pip3 install --no-cache-dir -r requirements.txt && rm -rf /root/.cache/pip
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH

View File

@ -0,0 +1,26 @@
blinker==1.4
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
httplib2==0.20.2
importlib-metadata==4.6.4
jeepney==0.7.1
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
more-itertools==8.10.0
oauthlib==3.2.0
packaging==24.1
pip==24.1.1
pipdeptree==2.23.0
pycurl==7.45.3
PyJWT==2.3.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -2,4 +2,5 @@
# Helper docker container to run python bottle apps
FROM python:3
RUN python -m pip install bottle
COPY requirements.txt /
RUN python -m pip install --no-cache-dir -r requirements.txt

View File

@ -0,0 +1,6 @@
bottle==0.12.25
packaging==24.1
pip==23.2.1
pipdeptree==2.23.0
setuptools==69.0.3
wheel==0.42.0

View File

@ -26,7 +26,6 @@ RUN apt-get update \
libicu-dev \
bsdutils \
curl \
python3-pika \
liblua5.1-dev \
luajit \
libssl-dev \
@ -61,49 +60,8 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
# kazoo 2.10.0 is broken
# https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html
RUN python3 -m pip install --no-cache-dir \
PyMySQL==1.1.0 \
asyncio==3.4.3 \
avro==1.10.2 \
azure-storage-blob==12.19.0 \
boto3==1.34.24 \
cassandra-driver==3.29.0 \
confluent-kafka==2.3.0 \
delta-spark==2.3.0 \
dict2xml==1.7.4 \
dicttoxml==1.7.16 \
docker==6.1.3 \
docker-compose==1.29.2 \
grpcio==1.60.0 \
grpcio-tools==1.60.0 \
kafka-python==2.0.2 \
lz4==4.3.3 \
minio==7.2.3 \
nats-py==2.6.0 \
protobuf==4.25.2 \
kazoo==2.9.0 \
psycopg2-binary==2.9.6 \
pyhdfs==0.3.1 \
pymongo==3.11.0 \
pyspark==3.3.2 \
pytest==7.4.4 \
pytest-order==1.0.0 \
pytest-random==0.2 \
pytest-repeat==0.9.3 \
pytest-timeout==2.2.0 \
pytest-xdist==3.5.0 \
pytest-reportlog==0.4.0 \
pytz==2023.3.post1 \
pyyaml==5.3.1 \
redis==5.0.1 \
requests-kerberos==0.14.0 \
tzlocal==2.1 \
retry==0.9.2 \
bs4==0.0.2 \
lxml==5.1.0 \
urllib3==2.0.7 \
jwcrypto==1.5.6
# bs4, lxml are for cloud tests, do not delete
COPY requirements.txt /
RUN python3 -m pip install --no-cache-dir -r requirements.txt
# Hudi supports only spark 3.3.*, not 3.4
RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \

View File

@ -0,0 +1,113 @@
PyHDFS==0.3.1
PyJWT==2.3.0
PyMySQL==1.1.0
PyNaCl==1.5.0
PyYAML==5.3.1
SecretStorage==3.3.1
argon2-cffi-bindings==21.2.0
argon2-cffi==23.1.0
async-timeout==4.0.3
asyncio==3.4.3
attrs==23.2.0
avro==1.10.2
azure-core==1.30.1
azure-storage-blob==12.19.0
bcrypt==4.1.3
beautifulsoup4==4.12.3
blinker==1.4
boto3==1.34.24
botocore==1.34.101
bs4==0.0.2
cassandra-driver==3.29.0
certifi==2024.2.2
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
confluent-kafka==2.3.0
cryptography==3.4.8
dbus-python==1.2.18
decorator==5.1.1
delta-spark==2.3.0
dict2xml==1.7.4
dicttoxml==1.7.16
distro-info==1.1+ubuntu0.2
distro==1.7.0
docker-compose==1.29.2
docker==6.1.3
dockerpty==0.4.1
docopt==0.6.2
exceptiongroup==1.2.1
execnet==2.1.1
geomet==0.2.1.post1
grpcio-tools==1.60.0
grpcio==1.60.0
gssapi==1.8.3
httplib2==0.20.2
idna==3.7
importlib-metadata==4.6.4
iniconfig==2.0.0
isodate==0.6.1
jeepney==0.7.1
jmespath==1.0.1
jsonschema==3.2.0
jwcrypto==1.5.6
kafka-python==2.0.2
kazoo==2.9.0
keyring==23.5.0
krb5==0.5.1
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
lxml==5.1.0
lz4==4.3.3
minio==7.2.3
more-itertools==8.10.0
nats-py==2.6.0
oauthlib==3.2.0
packaging==24.0
paramiko==3.4.0
pika==1.2.0
pip==24.1.1
pipdeptree==2.23.0
pluggy==1.5.0
protobuf==4.25.2
psycopg2-binary==2.9.6
py4j==0.10.9.5
py==1.11.0
pycparser==2.22
pycryptodome==3.20.0
pymongo==3.11.0
pyparsing==2.4.7
pyrsistent==0.20.0
pyspark==3.3.2
pyspnego==0.10.2
pytest-order==1.0.0
pytest-random==0.2
pytest-repeat==0.9.3
pytest-reportlog==0.4.0
pytest-timeout==2.2.0
pytest-xdist==3.5.0
pytest==7.4.4
python-apt==2.4.0+ubuntu3
python-dateutil==2.9.0.post0
python-dotenv==0.21.1
pytz==2023.3.post1
redis==5.0.1
requests-kerberos==0.14.0
requests==2.31.0
retry==0.9.2
s3transfer==0.10.1
setuptools==59.6.0
simplejson==3.19.2
six==1.16.0
soupsieve==2.5
texttable==1.7.0
tomli==2.0.1
typing_extensions==4.11.0
tzlocal==2.1
unattended-upgrades==0.1
urllib3==2.0.7
wadllib==1.3.6
websocket-client==0.59.0
wheel==0.37.1
zipp==1.0.0

View File

@ -1,3 +1,4 @@
# docker build -t clickhouse/libfuzzer .
ARG FROM_TAG=latest
FROM clickhouse/test-base:$FROM_TAG
@ -29,7 +30,8 @@ RUN apt-get update \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install Jinja2
COPY requirements.txt /
RUN pip3 install --no-cache-dir -r /requirements.txt
COPY * /

View File

@ -0,0 +1,27 @@
blinker==1.4
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
httplib2==0.20.2
importlib-metadata==4.6.4
jeepney==0.7.1
Jinja2==3.1.4
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
MarkupSafe==2.1.5
more-itertools==8.10.0
oauthlib==3.2.0
packaging==24.1
pip==24.1.1
pipdeptree==2.23.0
PyJWT==2.3.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -23,7 +23,6 @@ RUN apt-get update \
python3 \
python3-dev \
python3-pip \
python3-setuptools \
rsync \
tree \
tzdata \
@ -33,12 +32,14 @@ RUN apt-get update \
cargo \
ripgrep \
zstd \
&& pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \
&& apt-get purge --yes python3-dev g++ \
&& apt-get autoremove --yes \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
COPY requirements.txt /
RUN pip3 --no-cache-dir install -r requirements.txt
COPY run.sh /
CMD ["bash", "/run.sh"]

View File

@ -0,0 +1,32 @@
blinker==1.4
clickhouse-driver==0.2.7
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
httplib2==0.20.2
importlib-metadata==4.6.4
jeepney==0.7.1
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
more-itertools==8.10.0
numpy==1.26.3
oauthlib==3.2.0
packaging==24.1
pip==24.1.1
pipdeptree==2.23.0
Pygments==2.11.2
PyJWT==2.3.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
pytz==2023.4
PyYAML==6.0.1
scipy==1.12.0
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
tzlocal==2.1
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -18,11 +18,8 @@ RUN apt-get update --yes \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install \
numpy \
pyodbc \
deepdiff \
sqlglot
COPY requirements.txt /
RUN pip3 install --no-cache-dir -r /requirements.txt
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz"

View File

@ -0,0 +1,30 @@
blinker==1.4
cryptography==3.4.8
dbus-python==1.2.18
deepdiff==7.0.1
distro==1.7.0
httplib2==0.20.2
importlib-metadata==4.6.4
jeepney==0.7.1
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
more-itertools==8.10.0
numpy==1.26.4
oauthlib==3.2.0
ordered-set==4.1.0
packaging==24.1
pip==24.1.1
pipdeptree==2.23.0
PyJWT==2.3.0
pyodbc==5.1.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
sqlglot==23.16.0
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -14,9 +14,8 @@ RUN apt-get update --yes \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
RUN pip3 install \
pyyaml \
clickhouse-driver
COPY requirements.txt /
RUN pip3 install --no-cache-dir -r /requirements.txt
ARG sqltest_repo="https://github.com/elliotchance/sqltest/"

View File

@ -0,0 +1,29 @@
blinker==1.4
clickhouse-driver==0.2.7
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
httplib2==0.20.2
importlib-metadata==4.6.4
jeepney==0.7.1
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
more-itertools==8.10.0
oauthlib==3.2.0
packaging==24.1
pip==24.1.1
pipdeptree==2.23.0
PyJWT==2.3.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
pytz==2024.1
PyYAML==6.0.1
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
tzlocal==5.2
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -6,7 +6,6 @@ FROM clickhouse/stateless-test:$FROM_TAG
RUN apt-get update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get install --yes --no-install-recommends \
python3-requests \
nodejs \
npm \
&& apt-get clean \

View File

@ -25,10 +25,7 @@ RUN apt-get update -y \
openssl \
postgresql-client \
python3 \
python3-lxml \
python3-pip \
python3-requests \
python3-termcolor \
qemu-user-static \
sqlite3 \
sudo \
@ -51,7 +48,8 @@ RUN curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PR
&& unzip protoc-${PROTOC_VERSION}-linux-x86_64.zip -d /usr/local \
&& rm protoc-${PROTOC_VERSION}-linux-x86_64.zip
RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0
COPY requirements.txt /
RUN pip3 install --no-cache-dir -r /requirements.txt
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
&& cd /tmp/clickhouse-odbc-tmp \

View File

@ -0,0 +1,51 @@
awscli==1.22.34
blinker==1.4
botocore==1.23.34
certifi==2020.6.20
chardet==4.0.0
colorama==0.4.4
cryptography==3.4.8
dbus-python==1.2.18
distro==1.7.0
docutils==0.17.1
gyp==0.1
httplib2==0.20.2
idna==3.3
importlib-metadata==4.6.4
jeepney==0.7.1
Jinja2==3.1.3
jmespath==0.10.0
keyring==23.5.0
launchpadlib==1.10.16
lazr.restfulclient==0.14.4
lazr.uri==1.0.6
lxml==4.8.0
MarkupSafe==2.1.5
more-itertools==8.10.0
numpy==1.26.3
oauthlib==3.2.0
packaging==24.1
pandas==1.5.3
pip==24.1.1
pipdeptree==2.23.0
pyarrow==15.0.0
pyasn1==0.4.8
PyJWT==2.3.0
pyparsing==2.4.7
python-apt==2.4.0+ubuntu3
python-dateutil==2.8.1
pytz==2024.1
PyYAML==6.0.1
requests==2.32.3
roman==3.3
rsa==4.8
s3transfer==0.5.0
scipy==1.12.0
SecretStorage==3.3.1
setuptools==59.6.0
six==1.16.0
termcolor==1.1.0
urllib3==1.26.5
wadllib==1.3.6
wheel==0.37.1
zipp==1.0.0

View File

@ -23,22 +23,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
# python-magic is the same version as in Ubuntu 22.04
RUN pip3 install \
PyGithub \
black==23.12.0 \
boto3 \
codespell==2.2.1 \
mypy==1.8.0 \
pylint==3.1.0 \
python-magic==0.4.24 \
flake8==4.0.1 \
requests \
thefuzz \
tqdm==4.66.4 \
types-requests \
unidiff \
jwt \
&& rm -rf /root/.cache/pip
COPY requirements.txt /
RUN pip3 install --no-cache-dir -r requirements.txt
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
ENV LC_ALL en_US.UTF-8

View File

@ -0,0 +1,58 @@
aiohttp==3.9.5
aiosignal==1.3.1
astroid==3.1.0
async-timeout==4.0.3
attrs==23.2.0
black==23.12.0
boto3==1.34.131
botocore==1.34.131
certifi==2024.6.2
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
codespell==2.2.1
cryptography==42.0.8
Deprecated==1.2.14
dill==0.3.8
flake8==4.0.1
frozenlist==1.4.1
idna==3.7
isort==5.13.2
jmespath==1.0.1
jwt==1.3.1
mccabe==0.6.1
multidict==6.0.5
mypy==1.8.0
mypy-extensions==1.0.0
packaging==24.1
pathspec==0.9.0
pip==24.1.1
pipdeptree==2.23.0
platformdirs==4.2.2
pycodestyle==2.8.0
pycparser==2.22
pyflakes==2.4.0
PyGithub==2.3.0
PyJWT==2.8.0
pylint==3.1.0
PyNaCl==1.5.0
python-dateutil==2.9.0.post0
python-magic==0.4.24
PyYAML==6.0.1
rapidfuzz==3.9.3
requests==2.32.3
s3transfer==0.10.1
setuptools==59.6.0
six==1.16.0
thefuzz==0.22.1
tomli==2.0.1
tomlkit==0.12.5
tqdm==4.66.4
types-requests==2.32.0.20240622
typing_extensions==4.12.2
unidiff==0.7.5
urllib3==2.2.2
wheel==0.37.1
wrapt==1.16.0
yamllint==1.26.3
yarl==1.9.4

View File

@ -0,0 +1,26 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.6.2.17-stable (5710a8b5c0c) FIXME as compared to v24.6.1.4423-stable (dcced7c8478)
#### New Feature
* Backported in [#66002](https://github.com/ClickHouse/ClickHouse/issues/66002): Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### Improvement
* Backported in [#65898](https://github.com/ClickHouse/ClickHouse/issues/65898): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#65935](https://github.com/ClickHouse/ClickHouse/issues/65935): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#65907](https://github.com/ClickHouse/ClickHouse/issues/65907): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#65962](https://github.com/ClickHouse/ClickHouse/issues/65962): Add missing workload identity changes. [#65848](https://github.com/ClickHouse/ClickHouse/pull/65848) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Backported in [#66033](https://github.com/ClickHouse/ClickHouse/issues/66033): Follow up to [#65046](https://github.com/ClickHouse/ClickHouse/issues/65046). [#65928](https://github.com/ClickHouse/ClickHouse/pull/65928) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#66076](https://github.com/ClickHouse/ClickHouse/issues/66076): Fix support of non-const scale arguments in rounding functions. [#65983](https://github.com/ClickHouse/ClickHouse/pull/65983) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
* Backported in [#66017](https://github.com/ClickHouse/ClickHouse/issues/66017): Fix race in s3queue. [#65986](https://github.com/ClickHouse/ClickHouse/pull/65986) ([Kseniia Sumarokova](https://github.com/kssenii)).

View File

@ -974,6 +974,13 @@ Default value: false
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
## use_compact_variant_discriminators_serialization {#use_compact_variant_discriminators_serialization}
Enables compact mode for binary serialization of discriminators in Variant data type.
This mode allows to use significantly less memory for storing discriminators in parts when there is mostly one variant or a lot of NULL values.
Default value: true
## merge_workload
Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead.

View File

@ -0,0 +1,37 @@
---
slug: /en/sql-reference/aggregate-functions/reference/aggthrow
sidebar_position: 101
---
# aggThrow
This function can be used for the purpose of testing exception safety. It will throw an exception on creation with the specified probability.
**Syntax**
```sql
aggThrow(throw_prob)
```
**Arguments**
- `throw_prob` — Probability to throw on creation. [Float64](../../data-types/float.md).
**Returned value**
- An exception: `Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully`.
**Example**
Query:
```sql
SELECT number % 2 AS even, aggThrow(number) FROM numbers(10) GROUP BY even;
```
Result:
```response
Received exception:
Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully: While executing AggregatingTransform. (AGGREGATE_FUNCTION_THROW)
```

View File

@ -43,6 +43,7 @@ Standard aggregate functions:
ClickHouse-specific aggregate functions:
- [aggThrow](../reference/aggthrow.md)
- [analysisOfVariance](../reference/analysis_of_variance.md)
- [any](../reference/any_respect_nulls.md)
- [anyHeavy](../reference/anyheavy.md)

View File

@ -711,7 +711,13 @@ void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_c
ColumnPtr ColumnTuple::compress() const
{
if (columns.empty())
return Ptr();
{
return ColumnCompressed::create(size(), 0,
[n = column_length]
{
return ColumnTuple::create(n);
});
}
size_t byte_size = 0;
Columns compressed;

View File

@ -256,6 +256,8 @@ public:
bool position_independent_encoding = true;
bool use_compact_variant_discriminators_serialization = false;
enum class DynamicStatisticsMode
{
NONE, /// Don't write statistics.

View File

@ -30,12 +30,18 @@ namespace ErrorCodes
struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState
{
std::vector<ISerialization::SerializeBinaryBulkStatePtr> states;
explicit SerializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode)
{
}
SerializationVariant::DiscriminatorsSerializationMode discriminators_mode;
std::vector<ISerialization::SerializeBinaryBulkStatePtr> variant_states;
};
struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState
{
std::vector<ISerialization::DeserializeBinaryBulkStatePtr> states;
ISerialization::DeserializeBinaryBulkStatePtr discriminators_state;
std::vector<ISerialization::DeserializeBinaryBulkStatePtr> variant_states;
};
void SerializationVariant::enumerateStreams(
@ -71,7 +77,7 @@ void SerializationVariant::enumerateStreams(
.withType(type_variant ? type_variant->getVariant(i) : nullptr)
.withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr)
.withSerializationInfo(data.serialization_info)
.withDeserializeState(variant_deserialize_state ? variant_deserialize_state->states[i] : nullptr);
.withDeserializeState(variant_deserialize_state ? variant_deserialize_state->variant_states[i] : nullptr);
addVariantElementToPath(settings.path, i);
settings.path.back().data = variant_data;
@ -87,17 +93,26 @@ void SerializationVariant::serializeBinaryBulkStatePrefix(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
settings.path.push_back(Substream::VariantDiscriminators);
auto * discriminators_stream = settings.getter(settings.path);
settings.path.pop_back();
auto variant_state = std::make_shared<SerializeBinaryBulkStateVariant>();
variant_state->states.resize(variants.size());
if (!discriminators_stream)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for VariantDiscriminators in SerializationVariant::serializeBinaryBulkStatePrefix");
UInt64 mode = settings.use_compact_variant_discriminators_serialization ? DiscriminatorsSerializationMode::COMPACT : DiscriminatorsSerializationMode::BASIC;
writeBinaryLittleEndian(mode, *discriminators_stream);
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
auto variant_state = std::make_shared<SerializeBinaryBulkStateVariant>(mode);
variant_state->variant_states.resize(variants.size());
settings.path.push_back(Substream::VariantElements);
for (size_t i = 0; i < variants.size(); ++i)
{
addVariantElementToPath(settings.path, i);
variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->states[i]);
variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->variant_states[i]);
settings.path.pop_back();
}
@ -116,7 +131,7 @@ void SerializationVariant::serializeBinaryBulkStateSuffix(
for (size_t i = 0; i < variants.size(); ++i)
{
addVariantElementToPath(settings.path, i);
variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->states[i]);
variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->variant_states[i]);
settings.path.pop_back();
}
settings.path.pop_back();
@ -128,14 +143,19 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix(
DeserializeBinaryBulkStatePtr & state,
SubstreamsDeserializeStatesCache * cache) const
{
DeserializeBinaryBulkStatePtr discriminators_state = deserializeDiscriminatorsStatePrefix(settings, cache);
if (!discriminators_state)
return;
auto variant_state = std::make_shared<DeserializeBinaryBulkStateVariant>();
variant_state->states.resize(variants.size());
variant_state->discriminators_state = discriminators_state;
variant_state->variant_states.resize(variants.size());
settings.path.push_back(Substream::VariantElements);
for (size_t i = 0; i < variants.size(); ++i)
{
addVariantElementToPath(settings.path, i);
variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i], cache);
variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->variant_states[i], cache);
settings.path.pop_back();
}
@ -143,6 +163,29 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix(
state = std::move(variant_state);
}
ISerialization::DeserializeBinaryBulkStatePtr SerializationVariant::deserializeDiscriminatorsStatePrefix(
DeserializeBinaryBulkSettings & settings,
SubstreamsDeserializeStatesCache * cache)
{
settings.path.push_back(Substream::VariantDiscriminators);
DeserializeBinaryBulkStatePtr discriminators_state = nullptr;
if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path))
{
discriminators_state = cached_state;
}
else if (auto * discriminators_stream = settings.getter(settings.path))
{
UInt64 mode;
readBinaryLittleEndian(mode, *discriminators_stream);
discriminators_state = std::make_shared<DeserializeBinaryBulkStateVariantDiscriminators>(mode);
addToSubstreamsDeserializeStatesCache(cache, settings.path, discriminators_state);
}
settings.path.pop_back();
return discriminators_state;
}
void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(
const IColumn & column,
@ -165,13 +208,71 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
auto * variant_state = checkAndGetState<SerializeBinaryBulkStateVariant>(state);
/// If offset = 0 and limit == col.size() or we have only NULLs, we don't need to calculate
/// Don't write anything if column is empty.
if (limit == 0)
return;
/// Write number of rows in this granule in compact mode.
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
writeVarUInt(UInt64(limit), *discriminators_stream);
/// If column has only one none empty discriminators and no NULLs we don't need to
/// calculate limits for variants and use provided offset/limit.
if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
{
auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr);
/// In compact mode write the format of the granule and single non-empty discriminator.
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
{
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
}
/// For basic mode just serialize this discriminator limit times.
else
{
for (size_t i = 0; i < limit; ++i)
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
}
settings.path.push_back(Substream::VariantElements);
addVariantElementToPath(settings.path, non_empty_global_discr);
/// We can use the same offset/limit as for whole Variant column
variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->variant_states[non_empty_global_discr]);
variants_statistics[variant_names[non_empty_global_discr]] += limit;
settings.path.pop_back();
settings.path.pop_back();
return;
}
/// If column has only NULLs, just serialize NULL discriminators.
else if (col.hasOnlyNulls())
{
/// In compact mode write single NULL_DISCRIMINATOR.
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
{
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream);
}
/// In basic mode write NULL_DISCRIMINATOR limit times.
else
{
for (size_t i = 0; i < limit; ++i)
writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream);
}
return;
}
/// If offset = 0 and limit == col.size() we don't need to calculate
/// offsets and limits for variants and need to just serialize whole columns.
if ((offset == 0 && limit == col.size()) || col.hasOnlyNulls())
if ((offset == 0 && limit == col.size()))
{
/// First, serialize discriminators.
/// If we have only NULLs or local and global discriminators are the same, just serialize the column as is.
if (col.hasOnlyNulls() || col.hasGlobalVariantsOrder())
/// Here we are sure that column contains different discriminators, use plain granule format in compact mode.
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream);
/// If local and global discriminators are the same, just serialize the column as is.
if (col.hasGlobalVariantsOrder())
{
SerializationNumber<ColumnVariant::Discriminator>().serializeBinaryBulk(col.getLocalDiscriminatorsColumn(), *discriminators_stream, offset, limit);
}
@ -188,7 +289,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
for (size_t i = 0; i != variants.size(); ++i)
{
addVariantElementToPath(settings.path, i);
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]);
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->variant_states[i]);
variants_statistics[variant_names[i]] += col.getVariantByGlobalDiscriminator(i).size();
settings.path.pop_back();
}
@ -196,36 +297,16 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
return;
}
/// If we have only one non empty variant and no NULLs, we can use the same limit offset for this variant.
if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
{
/// First, serialize discriminators.
/// We know that all discriminators are the same, so we just need to serialize this discriminator limit times.
auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr);
for (size_t i = 0; i != limit; ++i)
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
/// Second, serialize non-empty variant (other variants are empty and we can skip their serialization).
settings.path.push_back(Substream::VariantElements);
addVariantElementToPath(settings.path, non_empty_global_discr);
/// We can use the same offset/limit as for whole Variant column
variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]);
variants_statistics[variant_names[non_empty_global_discr]] += limit;
settings.path.pop_back();
settings.path.pop_back();
return;
}
/// In general case we should iterate through local discriminators in range [offset, offset + limit] to serialize global discriminators and calculate offset/limit pair for each variant.
const auto & local_discriminators = col.getLocalDiscriminators();
const auto & offsets = col.getOffsets();
std::vector<std::pair<size_t, size_t>> variant_offsets_and_limits(variants.size(), {0, 0});
size_t end = offset + limit;
size_t num_non_empty_variants_in_range = 0;
ColumnVariant::Discriminator last_non_empty_variant_discr = 0;
for (size_t i = offset; i < end; ++i)
{
auto global_discr = col.globalDiscriminatorByLocal(local_discriminators[i]);
writeBinaryLittleEndian(global_discr, *discriminators_stream);
if (global_discr != ColumnVariant::NULL_DISCRIMINATOR)
{
/// If we see this discriminator for the first time, update offset
@ -233,9 +314,38 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
variant_offsets_and_limits[global_discr].first = offsets[i];
/// Update limit for this discriminator.
++variant_offsets_and_limits[global_discr].second;
++num_non_empty_variants_in_range;
last_non_empty_variant_discr = global_discr;
}
}
/// In basic mode just serialize discriminators as is row by row.
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::BASIC)
{
for (size_t i = offset; i < end; ++i)
writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream);
}
/// In compact mode check if we have the same discriminator for all rows in this granule.
/// First, check if all values in granule are NULLs.
else if (num_non_empty_variants_in_range == 0)
{
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream);
}
/// Then, check if there is only 1 variant and no NULLs in this granule.
else if (num_non_empty_variants_in_range == 1 && variant_offsets_and_limits[last_non_empty_variant_discr].second == limit)
{
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
writeBinaryLittleEndian(last_non_empty_variant_discr, *discriminators_stream);
}
/// Otherwise there are different discriminators in this granule.
else
{
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream);
for (size_t i = offset; i < end; ++i)
writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream);
}
/// Serialize variants in global order.
settings.path.push_back(Substream::VariantElements);
for (size_t i = 0; i != variants.size(); ++i)
@ -249,7 +359,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
variant_offsets_and_limits[i].first,
variant_offsets_and_limits[i].second,
settings,
variant_state->states[i]);
variant_state->variant_states[i]);
variants_statistics[variant_names[i]] += variant_offsets_and_limits[i].second;
settings.path.pop_back();
}
@ -284,39 +394,68 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
/// First, deserialize discriminators.
settings.path.push_back(Substream::VariantDiscriminators);
DeserializeBinaryBulkStateVariant * variant_state = nullptr;
std::vector<size_t> variant_limits;
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
{
variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
col.getLocalDiscriminatorsPtr() = cached_discriminators;
}
else
else if (auto * discriminators_stream = settings.getter(settings.path))
{
auto * discriminators_stream = settings.getter(settings.path);
if (!discriminators_stream)
return;
variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
auto * discriminators_state = checkAndGetState<DeserializeBinaryBulkStateVariantDiscriminators>(variant_state->discriminators_state);
/// Deserialize discriminators according to serialization mode.
if (discriminators_state->mode.value == DiscriminatorsSerializationMode::BASIC)
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0);
else
variant_limits = deserializeCompactDiscriminators(col.getLocalDiscriminatorsPtr(), limit, discriminators_stream, settings.continuous_reading, *discriminators_state);
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0);
addToSubstreamsCache(cache, settings.path, col.getLocalDiscriminatorsPtr());
}
/// It may happen that there is no such stream, in this case just do nothing.
else
{
settings.path.pop_back();
return;
}
settings.path.pop_back();
/// Second, calculate limits for each variant by iterating through new discriminators.
std::vector<size_t> variant_limits(variants.size(), 0);
auto & discriminators_data = col.getLocalDiscriminators();
size_t discriminators_offset = discriminators_data.size() - limit;
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
/// Second, calculate limits for each variant by iterating through new discriminators
/// if we didn't do it during discriminators deserialization.
if (variant_limits.empty())
{
ColumnVariant::Discriminator discr = discriminators_data[i];
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
++variant_limits[discr];
variant_limits.resize(variants.size(), 0);
auto & discriminators_data = col.getLocalDiscriminators();
/// We can actually read less than limit discriminators and we cannot determine the actual number of read rows
/// by discriminators column as it could be taken from the substreams cache. And we need actual number of read
/// rows to fill offsets correctly later if they are not in the cache. We can determine if offsets column is in cache
/// or not by comparing it with discriminators column size (they should be the same when offsets are in cache).
/// If offsets are not in the cache, we can use it's size to determine the actual number of read rows.
size_t num_new_discriminators = limit;
size_t offsets_size = col.getOffsetsPtr()->size();
if (discriminators_data.size() > offsets_size)
num_new_discriminators = discriminators_data.size() - offsets_size;
size_t discriminators_offset = discriminators_data.size() - num_new_discriminators;
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
{
ColumnVariant::Discriminator discr = discriminators_data[i];
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
++variant_limits[discr];
}
}
/// Now we can deserialize variants according to their limits.
auto * variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
settings.path.push_back(Substream::VariantElements);
for (size_t i = 0; i != variants.size(); ++i)
{
addVariantElementToPath(settings.path, i);
variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->states[i], cache);
variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->variant_states[i], cache);
settings.path.pop_back();
}
settings.path.pop_back();
@ -336,20 +475,49 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
}
else
{
auto & offsets = col.getOffsets();
offsets.reserve(offsets.size() + limit);
std::vector<size_t> variant_offsets;
variant_offsets.reserve(variants.size());
size_t num_non_empty_variants = 0;
ColumnVariant::Discriminator last_non_empty_discr = 0;
for (size_t i = 0; i != variants.size(); ++i)
variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]);
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
{
ColumnVariant::Discriminator discr = discriminators_data[i];
if (discr == ColumnVariant::NULL_DISCRIMINATOR)
offsets.emplace_back();
else
offsets.push_back(variant_offsets[discr]++);
if (variant_limits[i])
{
++num_non_empty_variants;
last_non_empty_discr = i;
}
variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]);
}
auto & discriminators_data = col.getLocalDiscriminators();
auto & offsets = col.getOffsets();
size_t num_new_offsets = discriminators_data.size() - offsets.size();
offsets.reserve(offsets.size() + num_new_offsets);
/// If there are only NULLs were read, fill offsets with 0.
if (num_non_empty_variants == 0)
{
offsets.resize_fill(discriminators_data.size(), 0);
}
/// If there is only 1 variant and no NULLs was read, fill offsets with sequential offsets of this variant.
else if (num_non_empty_variants == 1 && variant_limits[last_non_empty_discr] == num_new_offsets)
{
size_t first_offset = col.getVariantByLocalDiscriminator(last_non_empty_discr).size() - num_new_offsets;
for (size_t i = 0; i != num_new_offsets; ++i)
offsets.push_back(first_offset + i);
}
/// Otherwise iterate through discriminators and fill offsets accordingly.
else
{
size_t start = offsets.size();
for (size_t i = start; i != discriminators_data.size(); ++i)
{
ColumnVariant::Discriminator discr = discriminators_data[i];
if (discr == ColumnVariant::NULL_DISCRIMINATOR)
offsets.emplace_back();
else
offsets.push_back(variant_offsets[discr]++);
}
}
addToSubstreamsCache(cache, settings.path, col.getOffsetsPtr());
@ -357,6 +525,72 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
settings.path.pop_back();
}
std::vector<size_t> SerializationVariant::deserializeCompactDiscriminators(
DB::ColumnPtr & discriminators_column,
size_t limit,
ReadBuffer * stream,
bool continuous_reading,
DeserializeBinaryBulkStateVariantDiscriminators & state) const
{
auto & discriminators = assert_cast<ColumnVariant::ColumnDiscriminators &>(*discriminators_column->assumeMutable());
auto & discriminators_data = discriminators.getData();
/// Reset state if we are reading from the start of the granule and not from the previous position in the file.
if (!continuous_reading)
state.remaining_rows_in_granule = 0;
/// Calculate limits for variants during discriminators deserialization.
std::vector<size_t> variant_limits(variants.size(), 0);
while (limit)
{
/// If we read all rows from current granule, start reading the next one.
if (state.remaining_rows_in_granule == 0)
{
if (stream->eof())
return variant_limits;
readDiscriminatorsGranuleStart(state, stream);
}
size_t limit_in_granule = std::min(limit, state.remaining_rows_in_granule);
if (state.granule_format == CompactDiscriminatorsGranuleFormat::COMPACT)
{
auto & data = discriminators.getData();
data.resize_fill(data.size() + limit_in_granule, state.compact_discr);
if (state.compact_discr != ColumnVariant::NULL_DISCRIMINATOR)
variant_limits[state.compact_discr] += limit_in_granule;
}
else
{
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0);
size_t start = discriminators_data.size() - limit_in_granule;
for (size_t i = start; i != discriminators_data.size(); ++i)
{
ColumnVariant::Discriminator discr = discriminators_data[i];
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
++variant_limits[discr];
}
}
state.remaining_rows_in_granule -= limit_in_granule;
limit -= limit_in_granule;
}
return variant_limits;
}
void SerializationVariant::readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, DB::ReadBuffer * stream)
{
UInt64 granule_size;
readVarUInt(granule_size, *stream);
state.remaining_rows_in_granule = granule_size;
UInt8 granule_format;
readBinaryLittleEndian(granule_format, *stream);
state.granule_format = static_cast<CompactDiscriminatorsGranuleFormat>(granule_format);
if (granule_format == CompactDiscriminatorsGranuleFormat::COMPACT)
readBinaryLittleEndian(state.compact_discr, *stream);
}
void SerializationVariant::addVariantElementToPath(DB::ISerialization::SubstreamPath & path, size_t i) const
{
path.push_back(Substream::VariantElement);

View File

@ -6,6 +6,13 @@
namespace DB
{
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
}
/// Class for serializing/deserializing column with Variant type.
/// It supports both text and binary bulk serializations/deserializations.
///
@ -18,6 +25,17 @@ namespace DB
///
/// During binary bulk serialization it transforms local discriminators
/// to global and serializes them into a separate stream VariantDiscriminators.
/// There are 2 modes of serialising discriminators:
/// Basic mode, when all discriminators are serialized as is row by row.
/// Compact mode, when we avoid writing the same discriminators in granules when there is
/// only one variant (or only NULLs) in the granule.
/// In compact mode we serialize granules in the following format:
/// <number of rows in granule><granule format><granule data>
/// There are 2 different formats of granule - plain and compact.
/// Plain format is used when there are different discriminators in this granule,
/// in this format all discriminators are serialized as is row by row.
/// Compact format is used when all discriminators are the same in this granule,
/// in this case only this single discriminator is serialized.
/// Each variant is serialized into a separate stream with path VariantElements/VariantElement
/// (VariantElements stream is needed for correct sub-columns creation). We store and serialize
/// variants in a sparse form (the size of a variant column equals to the number of its discriminator
@ -32,6 +50,25 @@ namespace DB
class SerializationVariant : public ISerialization
{
public:
struct DiscriminatorsSerializationMode
{
enum Value
{
BASIC = 0, /// Store the whole discriminators column.
COMPACT = 1, /// Don't write discriminators in granule if all of them are the same.
};
static void checkMode(UInt64 mode)
{
if (mode > Value::COMPACT)
throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for SerializationVariant discriminators column.");
}
explicit DiscriminatorsSerializationMode(UInt64 mode) : value(static_cast<Value>(mode)) { checkMode(mode); }
Value value;
};
using VariantSerializations = std::vector<SerializationPtr>;
explicit SerializationVariant(
@ -123,8 +160,43 @@ public:
static std::vector<size_t> getVariantsDeserializeTextOrder(const DataTypes & variant_types);
private:
friend SerializationVariantElement;
void addVariantElementToPath(SubstreamPath & path, size_t i) const;
enum CompactDiscriminatorsGranuleFormat
{
PLAIN = 0, /// Granule has different discriminators and they are serialized as is row by row.
COMPACT = 1, /// Granule has single discriminator for all rows and it is serialized as single value.
};
struct DeserializeBinaryBulkStateVariantDiscriminators : public ISerialization::DeserializeBinaryBulkState
{
explicit DeserializeBinaryBulkStateVariantDiscriminators(UInt64 mode_) : mode(mode_)
{
}
DiscriminatorsSerializationMode mode;
/// Deserialize state of currently read granule in compact mode.
CompactDiscriminatorsGranuleFormat granule_format = CompactDiscriminatorsGranuleFormat::PLAIN;
size_t remaining_rows_in_granule = 0;
ColumnVariant::Discriminator compact_discr = 0;
};
static DeserializeBinaryBulkStatePtr deserializeDiscriminatorsStatePrefix(
DeserializeBinaryBulkSettings & settings,
SubstreamsDeserializeStatesCache * cache);
std::vector<size_t> deserializeCompactDiscriminators(
ColumnPtr & discriminators_column,
size_t limit,
ReadBuffer * stream,
bool continuous_reading,
DeserializeBinaryBulkStateVariantDiscriminators & state) const;
static void readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, ReadBuffer * stream);
bool tryDeserializeTextEscapedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
bool tryDeserializeTextQuotedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
bool tryDeserializeWholeTextImpl(IColumn & column, const String & field, const FormatSettings & settings) const;

View File

@ -1,5 +1,6 @@
#include <DataTypes/Serializations/SerializationVariantElement.h>
#include <DataTypes/Serializations/SerializationNumber.h>
#include <DataTypes/Serializations/SerializationVariant.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnNullable.h>
#include <IO/ReadHelpers.h>
@ -12,7 +13,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
}
struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState
struct SerializationVariantElement::DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState
{
/// During deserialization discriminators and variant streams can be shared.
/// For example we can read several variant elements together: "select v.UInt32, v.String from table",
@ -24,7 +25,7 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria
/// substream cache correctly.
ColumnPtr discriminators;
ColumnPtr variant;
ISerialization::DeserializeBinaryBulkStatePtr discriminators_state;
ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
};
@ -65,7 +66,12 @@ void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinary
void SerializationVariantElement::deserializeBinaryBulkStatePrefix(
DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const
{
DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache);
if (!discriminators_state)
return;
auto variant_element_state = std::make_shared<DeserializeBinaryBulkStateVariantElement>();
variant_element_state->discriminators_state = discriminators_state;
addVariantToPath(settings.path);
nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state, cache);
@ -86,35 +92,54 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const
{
auto * variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
/// First, deserialize discriminators from Variant column.
settings.path.push_back(Substream::VariantDiscriminators);
DeserializeBinaryBulkStateVariantElement * variant_element_state = nullptr;
std::optional<size_t> variant_limit;
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
{
variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
variant_element_state->discriminators = cached_discriminators;
}
else
else if (auto * discriminators_stream = settings.getter(settings.path))
{
auto * discriminators_stream = settings.getter(settings.path);
if (!discriminators_stream)
return;
variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
auto * discriminators_state = checkAndGetState<SerializationVariant::DeserializeBinaryBulkStateVariantDiscriminators>(variant_element_state->discriminators_state);
/// If we started to read a new column, reinitialize discriminators column in deserialization state.
if (!variant_element_state->discriminators || result_column->empty())
variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create();
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
/// Deserialize discriminators according to serialization mode.
if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC)
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
else
variant_limit = deserializeCompactDiscriminators(variant_element_state->discriminators, limit, discriminators_stream, settings.continuous_reading, *variant_element_state);
addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators);
}
else
{
settings.path.pop_back();
return;
}
settings.path.pop_back();
/// Iterate through new discriminators to calculate the limit for our variant.
/// We could read less than limit discriminators, but we will need actual number of read rows later.
size_t num_new_discriminators = variant_element_state->discriminators->size() - result_column->size();
/// Iterate through new discriminators to calculate the limit for our variant
/// if we didn't do it during discriminators deserialization.
const auto & discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*variant_element_state->discriminators).getData();
size_t discriminators_offset = variant_element_state->discriminators->size() - limit;
size_t variant_limit = 0;
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
variant_limit += (discriminators_data[i] == variant_discriminator);
size_t discriminators_offset = variant_element_state->discriminators->size() - num_new_discriminators;
if (!variant_limit)
{
variant_limit = 0;
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
*variant_limit += (discriminators_data[i] == variant_discriminator);
}
/// Now we know the limit for our variant and can deserialize it.
@ -125,19 +150,19 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
auto & nullable_column = assert_cast<ColumnNullable &>(*mutable_column);
NullMap & null_map = nullable_column.getNullMapData();
/// If we have only our discriminator in range, fill null map with 0.
if (variant_limit == limit)
if (variant_limit == num_new_discriminators)
{
null_map.resize_fill(null_map.size() + limit, 0);
null_map.resize_fill(null_map.size() + num_new_discriminators, 0);
}
/// If no our discriminator in current range, fill null map with 1.
else if (variant_limit == 0)
{
null_map.resize_fill(null_map.size() + limit, 1);
null_map.resize_fill(null_map.size() + num_new_discriminators, 1);
}
/// Otherwise we should iterate through discriminators to fill null map.
else
{
null_map.reserve(null_map.size() + limit);
null_map.reserve(null_map.size() + num_new_discriminators);
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
null_map.push_back(discriminators_data[i] != variant_discriminator);
}
@ -159,12 +184,12 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
/// If nothing to deserialize, just insert defaults.
if (variant_limit == 0)
{
mutable_column->insertManyDefaults(limit);
mutable_column->insertManyDefaults(num_new_discriminators);
return;
}
addVariantToPath(settings.path);
nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, variant_limit, settings, variant_element_state->variant_element_state, cache);
nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, *variant_limit, settings, variant_element_state->variant_element_state, cache);
removeVariantFromPath(settings.path);
/// If nothing was deserialized when variant_limit > 0
@ -173,16 +198,16 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
/// In this case we should just insert default values.
if (variant_element_state->variant->empty())
{
mutable_column->insertManyDefaults(limit);
mutable_column->insertManyDefaults(num_new_discriminators);
return;
}
size_t variant_offset = variant_element_state->variant->size() - variant_limit;
size_t variant_offset = variant_element_state->variant->size() - *variant_limit;
/// If we have only our discriminator in range, insert the whole range to result column.
if (variant_limit == limit)
if (variant_limit == num_new_discriminators)
{
mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, variant_limit);
mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, *variant_limit);
}
/// Otherwise iterate through discriminators and insert value from variant or default value depending on the discriminator.
else
@ -197,6 +222,57 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
}
}
size_t SerializationVariantElement::deserializeCompactDiscriminators(
DB::ColumnPtr & discriminators_column,
size_t limit,
DB::ReadBuffer * stream,
bool continuous_reading,
DeserializeBinaryBulkStateVariantElement & variant_element_state) const
{
auto * discriminators_state = checkAndGetState<SerializationVariant::DeserializeBinaryBulkStateVariantDiscriminators>(variant_element_state.discriminators_state);
auto & discriminators = assert_cast<ColumnVariant::ColumnDiscriminators &>(*discriminators_column->assumeMutable());
auto & discriminators_data = discriminators.getData();
/// Reset state if we are reading from the start of the granule and not from the previous position in the file.
if (!continuous_reading)
discriminators_state->remaining_rows_in_granule = 0;
/// Calculate our variant limit during discriminators deserialization.
size_t variant_limit = 0;
while (limit)
{
/// If we read all rows from current granule, start reading the next one.
if (discriminators_state->remaining_rows_in_granule == 0)
{
if (stream->eof())
return variant_limit;
SerializationVariant::readDiscriminatorsGranuleStart(*discriminators_state, stream);
}
size_t limit_in_granule = std::min(limit, discriminators_state->remaining_rows_in_granule);
if (discriminators_state->granule_format == SerializationVariant::CompactDiscriminatorsGranuleFormat::COMPACT)
{
auto & data = discriminators.getData();
data.resize_fill(data.size() + limit_in_granule, discriminators_state->compact_discr);
if (discriminators_state->compact_discr == variant_discriminator)
variant_limit += limit_in_granule;
}
else
{
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0);
size_t start = discriminators_data.size() - limit_in_granule;
for (size_t i = start; i != discriminators_data.size(); ++i)
variant_limit += (discriminators_data[i] == variant_discriminator);
}
discriminators_state->remaining_rows_in_granule -= limit_in_granule;
limit -= limit_in_granule;
}
return variant_limit;
}
void SerializationVariantElement::addVariantToPath(DB::ISerialization::SubstreamPath & path) const
{
path.push_back(Substream::VariantElements);

View File

@ -80,6 +80,15 @@ public:
private:
friend SerializationVariant;
struct DeserializeBinaryBulkStateVariantElement;
size_t deserializeCompactDiscriminators(
ColumnPtr & discriminators_column,
size_t limit,
ReadBuffer * stream,
bool continuous_reading,
DeserializeBinaryBulkStateVariantElement & variant_element_state) const;
void addVariantToPath(SubstreamPath & path) const;
void removeVariantFromPath(SubstreamPath & path) const;
};

View File

@ -154,7 +154,8 @@ void writeColumnSingleGranule(
const SerializationPtr & serialization,
ISerialization::OutputStreamGetter stream_getter,
size_t from_row,
size_t number_of_rows)
size_t number_of_rows,
const MergeTreeWriterSettings & settings)
{
ISerialization::SerializeBinaryBulkStatePtr state;
ISerialization::SerializeBinaryBulkSettings serialize_settings;
@ -162,6 +163,7 @@ void writeColumnSingleGranule(
serialize_settings.getter = stream_getter;
serialize_settings.position_independent_encoding = true;
serialize_settings.low_cardinality_max_dictionary_size = 0;
serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization;
serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX;
serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state);
@ -259,7 +261,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G
writeColumnSingleGranule(
block.getByName(name_and_type->name), getSerialization(name_and_type->name),
stream_getter, granule.start_row, granule.rows_to_write);
stream_getter, granule.start_row, granule.rows_to_write, settings);
/// Each type always have at least one substream
prev_stream->hashing_buf.next();

View File

@ -433,6 +433,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
if (inserted)
{
ISerialization::SerializeBinaryBulkSettings serialize_settings;
serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization;
serialize_settings.getter = createStreamGetter(name_and_type, offset_columns);
serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second);
}
@ -441,6 +442,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
serialize_settings.getter = createStreamGetter(name_and_type, offset_columns);
serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size;
serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part;
serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization;
for (const auto & granule : granules)
{
@ -630,6 +632,7 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(MergeTreeDataPartChecksums &
ISerialization::SerializeBinaryBulkSettings serialize_settings;
serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size;
serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part;
serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization;
WrittenOffsetColumns offset_columns;
if (rows_written_in_last_mark > 0)
{

View File

@ -76,6 +76,7 @@ struct MergeTreeWriterSettings
, max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation)
, low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size)
, low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0)
, use_compact_variant_discriminators_serialization(storage_settings->use_compact_variant_discriminators_serialization)
{
}
@ -98,6 +99,7 @@ struct MergeTreeWriterSettings
size_t low_cardinality_max_dictionary_size;
bool low_cardinality_use_single_dictionary_for_part;
bool use_compact_variant_discriminators_serialization;
};
}

View File

@ -43,6 +43,7 @@ struct Settings;
M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \
M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \
M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \
M(Bool, use_compact_variant_discriminators_serialization, true, "Use compact version of Variant discriminators serialization.", 0) \
\
/** Merge settings. */ \
M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \

View File

@ -1,2 +1 @@
1,0
NETWORK_ERROR=0

View File

@ -19,20 +19,25 @@ create table dist_01247 as data_01247 engine=Distributed(test_cluster_two_shards
select * from dist_01247 format Null;
EOL
network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'")
# NOTE: it is possible to got NETWORK_ERROR even with no-parallel, at least due to system.*_log_sender to the cloud
for ((i = 0; i < 100; ++i)); do
network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'")
opts=(
"--max_distributed_connections=1"
"--optimize_skip_unused_shards=1"
"--optimize_distributed_group_by_sharding_key=1"
"--prefer_localhost_replica=0"
)
$CLICKHOUSE_CLIENT "${opts[@]}" --format CSV -nm <<EOL
select count(), * from dist_01247 group by number order by number limit 1;
EOL
opts=(
"--max_distributed_connections=1"
"--optimize_skip_unused_shards=1"
"--optimize_distributed_group_by_sharding_key=1"
"--prefer_localhost_replica=0"
)
$CLICKHOUSE_CLIENT "${opts[@]}" --format CSV -nm -q "select count(), * from dist_01247 group by number order by number limit 1 format Null"
# expect zero new network errors
network_errors_after=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'")
# expect zero new network errors
network_errors_after=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'")
if [[ $((network_errors_after-network_errors_before)) -eq 0 ]]; then
break
fi
done
echo NETWORK_ERROR=$(( network_errors_after-network_errors_before ))
$CLICKHOUSE_CLIENT -q "drop table data_01247"

View File

@ -0,0 +1,2 @@
() 0
() 0

View File

@ -0,0 +1,2 @@
SELECT tuple(), 0 FROM numbers(1) SETTINGS use_query_cache = true;
SELECT tuple(), 0 FROM numbers(1) SETTINGS use_query_cache = true;

View File

@ -1098,6 +1098,8 @@ aggregatefunction
aggregatingmergetree
aggregatio
aggretate
aggthrow
aggThrow
aiochclient
allocator
alphaTokens

View File

@ -1,3 +1,4 @@
v24.6.2.17-stable 2024-07-05
v24.6.1.4423-stable 2024-07-01
v24.5.4.49-stable 2024-07-01
v24.5.3.5-stable 2024-06-13
@ -6,6 +7,7 @@ v24.5.1.1763-stable 2024-06-01
v24.4.3.25-stable 2024-06-14
v24.4.2.141-stable 2024-06-07
v24.4.1.2088-stable 2024-05-01
v24.3.5.46-lts 2024-07-03
v24.3.4.147-lts 2024-06-13
v24.3.3.102-lts 2024-05-01
v24.3.2.23-lts 2024-04-03

1 v24.6.1.4423-stable v24.6.2.17-stable 2024-07-01 2024-07-05
1 v24.6.2.17-stable 2024-07-05
2 v24.6.1.4423-stable v24.6.1.4423-stable 2024-07-01 2024-07-01
3 v24.5.4.49-stable v24.5.4.49-stable 2024-07-01 2024-07-01
4 v24.5.3.5-stable v24.5.3.5-stable 2024-06-13 2024-06-13
7 v24.4.3.25-stable v24.4.3.25-stable 2024-06-14 2024-06-14
8 v24.4.2.141-stable v24.4.2.141-stable 2024-06-07 2024-06-07
9 v24.4.1.2088-stable v24.4.1.2088-stable 2024-05-01 2024-05-01
10 v24.3.5.46-lts 2024-07-03
11 v24.3.4.147-lts v24.3.4.147-lts 2024-06-13 2024-06-13
12 v24.3.3.102-lts v24.3.3.102-lts 2024-05-01 2024-05-01
13 v24.3.2.23-lts v24.3.2.23-lts 2024-04-03 2024-04-03