mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge branch 'hive_style_partitioning' of https://github.com/yariks5s/ClickHouse into hive_style_partitioning
This commit is contained in:
commit
d627cfe077
@ -40,8 +40,6 @@ Every month we get together with the community (users, contributors, customers,
|
||||
|
||||
Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
|
||||
|
||||
* [AWS Summit in DC](https://clickhouse.com/company/events/2024-06-aws-summit-dc) - Jun 26
|
||||
* [ClickHouse Meetup in Amsterdam](https://www.meetup.com/clickhouse-netherlands-user-group/events/300781068/) - Jun 27
|
||||
* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9
|
||||
* [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9
|
||||
* [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9
|
||||
|
@ -84,5 +84,5 @@ if (CMAKE_CROSSCOMPILING)
|
||||
message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
|
||||
endif ()
|
||||
|
||||
message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}")
|
||||
message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILER_TARGET}")
|
||||
endif ()
|
||||
|
2
contrib/orc
vendored
2
contrib/orc
vendored
@ -1 +1 @@
|
||||
Subproject commit 947cebaf9432d708253ac08dc3012daa6b4ede6f
|
||||
Subproject commit bcc025c09828c556f54cfbdf83a66b9acae7d17f
|
2
contrib/s2geometry
vendored
2
contrib/s2geometry
vendored
@ -1 +1 @@
|
||||
Subproject commit 0146e2d1355828f8f633cb050948250ad7406c57
|
||||
Subproject commit 6522a40338d58752c2a4227a3fc2bc4107c73e43
|
@ -1,7 +1,6 @@
|
||||
option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES})
|
||||
|
||||
# ARCH_S390X broke upstream, it can be re-enabled once https://github.com/google/s2geometry/pull/372 is merged
|
||||
if (NOT ENABLE_S2_GEOMETRY OR ARCH_S390X)
|
||||
if (NOT ENABLE_S2_GEOMETRY)
|
||||
message(STATUS "Not using S2 Geometry")
|
||||
return()
|
||||
endif()
|
||||
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.6.1.4423"
|
||||
ARG VERSION="24.6.2.17"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
47
docker/reqgenerator.py
Normal file
47
docker/reqgenerator.py
Normal file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python3
|
||||
# To run this script you must install docker and piddeptree python package
|
||||
#
|
||||
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def build_docker_deps(image_name, imagedir):
|
||||
cmd = f"""docker run --entrypoint "/bin/bash" {image_name} -c "pip install pipdeptree 2>/dev/null 1>/dev/null && pipdeptree --freeze --warn silence | sed 's/ \+//g' | sort | uniq" > {imagedir}/requirements.txt"""
|
||||
subprocess.check_call(cmd, shell=True)
|
||||
|
||||
|
||||
def check_docker_file_install_with_pip(filepath):
|
||||
image_name = None
|
||||
with open(filepath, "r") as f:
|
||||
for line in f:
|
||||
if "docker build" in line:
|
||||
arr = line.split(" ")
|
||||
if len(arr) > 4:
|
||||
image_name = arr[4]
|
||||
if "pip3 install" in line or "pip install" in line:
|
||||
return image_name, True
|
||||
return image_name, False
|
||||
|
||||
|
||||
def process_affected_images(images_dir):
|
||||
for root, _dirs, files in os.walk(images_dir):
|
||||
for f in files:
|
||||
if f == "Dockerfile":
|
||||
docker_file_path = os.path.join(root, f)
|
||||
print("Checking image on path", docker_file_path)
|
||||
image_name, has_pip = check_docker_file_install_with_pip(
|
||||
docker_file_path
|
||||
)
|
||||
if has_pip:
|
||||
print("Find pip in", image_name)
|
||||
try:
|
||||
build_docker_deps(image_name, root)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
else:
|
||||
print("Pip not found in", docker_file_path)
|
||||
|
||||
|
||||
process_affected_images(sys.argv[1])
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.6.1.4423"
|
||||
ARG VERSION="24.6.2.17"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="24.6.1.4423"
|
||||
ARG VERSION="24.6.2.17"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
#docker-official-library:off
|
||||
|
@ -19,10 +19,7 @@ RUN apt-get update \
|
||||
odbcinst \
|
||||
psmisc \
|
||||
python3 \
|
||||
python3-lxml \
|
||||
python3-pip \
|
||||
python3-requests \
|
||||
python3-termcolor \
|
||||
unixodbc \
|
||||
pv \
|
||||
jq \
|
||||
@ -31,7 +28,8 @@ RUN apt-get update \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
|
||||
# This symlink is required by gcc to find the lld linker
|
||||
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
|
||||
@ -39,6 +37,10 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
|
||||
# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d
|
||||
RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake
|
||||
|
||||
# LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path.
|
||||
# It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792
|
||||
RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu
|
||||
|
||||
ARG CCACHE_VERSION=4.6.1
|
||||
RUN mkdir /tmp/ccache \
|
||||
&& cd /tmp/ccache \
|
||||
|
41
docker/test/fasttest/requirements.txt
Normal file
41
docker/test/fasttest/requirements.txt
Normal file
@ -0,0 +1,41 @@
|
||||
Jinja2==3.1.3
|
||||
MarkupSafe==2.1.5
|
||||
PyJWT==2.3.0
|
||||
PyYAML==6.0.1
|
||||
Pygments==2.11.2
|
||||
SecretStorage==3.3.1
|
||||
blinker==1.4
|
||||
certifi==2020.6.20
|
||||
chardet==4.0.0
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
idna==3.3
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
lxml==4.8.0
|
||||
more-itertools==8.10.0
|
||||
numpy==1.26.3
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pandas==1.5.3
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
python-dateutil==2.9.0.post0
|
||||
pytz==2024.1
|
||||
requests==2.32.3
|
||||
scipy==1.12.0
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
termcolor==1.1.0
|
||||
urllib3==1.26.5
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -84,6 +84,8 @@ function start_server
|
||||
echo "ClickHouse server pid '$server_pid' started and responded"
|
||||
}
|
||||
|
||||
export -f start_server
|
||||
|
||||
function clone_root
|
||||
{
|
||||
[ "$UID" -eq 0 ] && git config --global --add safe.directory "$FASTTEST_SOURCE"
|
||||
@ -254,6 +256,19 @@ function configure
|
||||
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
|
||||
}
|
||||
|
||||
function timeout_with_logging() {
|
||||
local exit_code=0
|
||||
|
||||
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
|
||||
|
||||
if [[ "${exit_code}" -eq "124" ]]
|
||||
then
|
||||
echo "The command 'timeout ${*}' has been killed by timeout"
|
||||
fi
|
||||
|
||||
return $exit_code
|
||||
}
|
||||
|
||||
function run_tests
|
||||
{
|
||||
clickhouse-server --version
|
||||
@ -292,6 +307,8 @@ function run_tests
|
||||
clickhouse stop --pid-path "$FASTTEST_DATA"
|
||||
}
|
||||
|
||||
export -f run_tests
|
||||
|
||||
case "$stage" in
|
||||
"")
|
||||
ls -la
|
||||
@ -315,7 +332,7 @@ case "$stage" in
|
||||
configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt"
|
||||
;&
|
||||
"run_tests")
|
||||
run_tests
|
||||
timeout_with_logging 35m bash -c run_tests ||:
|
||||
/process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \
|
||||
--out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \
|
||||
--out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv"
|
||||
|
@ -31,7 +31,8 @@ RUN apt-get update \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
RUN pip3 install Jinja2
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
|
||||
COPY * /
|
||||
|
||||
|
27
docker/test/fuzzer/requirements.txt
Normal file
27
docker/test/fuzzer/requirements.txt
Normal file
@ -0,0 +1,27 @@
|
||||
blinker==1.4
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
Jinja2==3.1.4
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
MarkupSafe==2.1.5
|
||||
more-itertools==8.10.0
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
PyJWT==2.3.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -33,7 +33,8 @@ RUN apt-get update \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
RUN pip3 install pycurl
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r requirements.txt && rm -rf /root/.cache/pip
|
||||
|
||||
# Architecture of the image when BuildKit/buildx is used
|
||||
ARG TARGETARCH
|
||||
|
26
docker/test/integration/base/requirements.txt
Normal file
26
docker/test/integration/base/requirements.txt
Normal file
@ -0,0 +1,26 @@
|
||||
blinker==1.4
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
more-itertools==8.10.0
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
pycurl==7.45.3
|
||||
PyJWT==2.3.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -2,4 +2,5 @@
|
||||
# Helper docker container to run python bottle apps
|
||||
|
||||
FROM python:3
|
||||
RUN python -m pip install bottle
|
||||
COPY requirements.txt /
|
||||
RUN python -m pip install --no-cache-dir -r requirements.txt
|
||||
|
6
docker/test/integration/resolver/requirements.txt
Normal file
6
docker/test/integration/resolver/requirements.txt
Normal file
@ -0,0 +1,6 @@
|
||||
bottle==0.12.25
|
||||
packaging==24.1
|
||||
pip==23.2.1
|
||||
pipdeptree==2.23.0
|
||||
setuptools==69.0.3
|
||||
wheel==0.42.0
|
@ -26,7 +26,6 @@ RUN apt-get update \
|
||||
libicu-dev \
|
||||
bsdutils \
|
||||
curl \
|
||||
python3-pika \
|
||||
liblua5.1-dev \
|
||||
luajit \
|
||||
libssl-dev \
|
||||
@ -61,49 +60,8 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
|
||||
|
||||
# kazoo 2.10.0 is broken
|
||||
# https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html
|
||||
RUN python3 -m pip install --no-cache-dir \
|
||||
PyMySQL==1.1.0 \
|
||||
asyncio==3.4.3 \
|
||||
avro==1.10.2 \
|
||||
azure-storage-blob==12.19.0 \
|
||||
boto3==1.34.24 \
|
||||
cassandra-driver==3.29.0 \
|
||||
confluent-kafka==2.3.0 \
|
||||
delta-spark==2.3.0 \
|
||||
dict2xml==1.7.4 \
|
||||
dicttoxml==1.7.16 \
|
||||
docker==6.1.3 \
|
||||
docker-compose==1.29.2 \
|
||||
grpcio==1.60.0 \
|
||||
grpcio-tools==1.60.0 \
|
||||
kafka-python==2.0.2 \
|
||||
lz4==4.3.3 \
|
||||
minio==7.2.3 \
|
||||
nats-py==2.6.0 \
|
||||
protobuf==4.25.2 \
|
||||
kazoo==2.9.0 \
|
||||
psycopg2-binary==2.9.6 \
|
||||
pyhdfs==0.3.1 \
|
||||
pymongo==3.11.0 \
|
||||
pyspark==3.3.2 \
|
||||
pytest==7.4.4 \
|
||||
pytest-order==1.0.0 \
|
||||
pytest-random==0.2 \
|
||||
pytest-repeat==0.9.3 \
|
||||
pytest-timeout==2.2.0 \
|
||||
pytest-xdist==3.5.0 \
|
||||
pytest-reportlog==0.4.0 \
|
||||
pytz==2023.3.post1 \
|
||||
pyyaml==5.3.1 \
|
||||
redis==5.0.1 \
|
||||
requests-kerberos==0.14.0 \
|
||||
tzlocal==2.1 \
|
||||
retry==0.9.2 \
|
||||
bs4==0.0.2 \
|
||||
lxml==5.1.0 \
|
||||
urllib3==2.0.7 \
|
||||
jwcrypto==1.5.6
|
||||
# bs4, lxml are for cloud tests, do not delete
|
||||
COPY requirements.txt /
|
||||
RUN python3 -m pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Hudi supports only spark 3.3.*, not 3.4
|
||||
RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \
|
||||
|
113
docker/test/integration/runner/requirements.txt
Normal file
113
docker/test/integration/runner/requirements.txt
Normal file
@ -0,0 +1,113 @@
|
||||
PyHDFS==0.3.1
|
||||
PyJWT==2.3.0
|
||||
PyMySQL==1.1.0
|
||||
PyNaCl==1.5.0
|
||||
PyYAML==5.3.1
|
||||
SecretStorage==3.3.1
|
||||
argon2-cffi-bindings==21.2.0
|
||||
argon2-cffi==23.1.0
|
||||
async-timeout==4.0.3
|
||||
asyncio==3.4.3
|
||||
attrs==23.2.0
|
||||
avro==1.10.2
|
||||
azure-core==1.30.1
|
||||
azure-storage-blob==12.19.0
|
||||
bcrypt==4.1.3
|
||||
beautifulsoup4==4.12.3
|
||||
blinker==1.4
|
||||
boto3==1.34.24
|
||||
botocore==1.34.101
|
||||
bs4==0.0.2
|
||||
cassandra-driver==3.29.0
|
||||
certifi==2024.2.2
|
||||
cffi==1.16.0
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
confluent-kafka==2.3.0
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
decorator==5.1.1
|
||||
delta-spark==2.3.0
|
||||
dict2xml==1.7.4
|
||||
dicttoxml==1.7.16
|
||||
distro-info==1.1+ubuntu0.2
|
||||
distro==1.7.0
|
||||
docker-compose==1.29.2
|
||||
docker==6.1.3
|
||||
dockerpty==0.4.1
|
||||
docopt==0.6.2
|
||||
exceptiongroup==1.2.1
|
||||
execnet==2.1.1
|
||||
geomet==0.2.1.post1
|
||||
grpcio-tools==1.60.0
|
||||
grpcio==1.60.0
|
||||
gssapi==1.8.3
|
||||
httplib2==0.20.2
|
||||
idna==3.7
|
||||
importlib-metadata==4.6.4
|
||||
iniconfig==2.0.0
|
||||
isodate==0.6.1
|
||||
jeepney==0.7.1
|
||||
jmespath==1.0.1
|
||||
jsonschema==3.2.0
|
||||
jwcrypto==1.5.6
|
||||
kafka-python==2.0.2
|
||||
kazoo==2.9.0
|
||||
keyring==23.5.0
|
||||
krb5==0.5.1
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
lxml==5.1.0
|
||||
lz4==4.3.3
|
||||
minio==7.2.3
|
||||
more-itertools==8.10.0
|
||||
nats-py==2.6.0
|
||||
oauthlib==3.2.0
|
||||
packaging==24.0
|
||||
paramiko==3.4.0
|
||||
pika==1.2.0
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
pluggy==1.5.0
|
||||
protobuf==4.25.2
|
||||
psycopg2-binary==2.9.6
|
||||
py4j==0.10.9.5
|
||||
py==1.11.0
|
||||
pycparser==2.22
|
||||
pycryptodome==3.20.0
|
||||
pymongo==3.11.0
|
||||
pyparsing==2.4.7
|
||||
pyrsistent==0.20.0
|
||||
pyspark==3.3.2
|
||||
pyspnego==0.10.2
|
||||
pytest-order==1.0.0
|
||||
pytest-random==0.2
|
||||
pytest-repeat==0.9.3
|
||||
pytest-reportlog==0.4.0
|
||||
pytest-timeout==2.2.0
|
||||
pytest-xdist==3.5.0
|
||||
pytest==7.4.4
|
||||
python-apt==2.4.0+ubuntu3
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==0.21.1
|
||||
pytz==2023.3.post1
|
||||
redis==5.0.1
|
||||
requests-kerberos==0.14.0
|
||||
requests==2.31.0
|
||||
retry==0.9.2
|
||||
s3transfer==0.10.1
|
||||
setuptools==59.6.0
|
||||
simplejson==3.19.2
|
||||
six==1.16.0
|
||||
soupsieve==2.5
|
||||
texttable==1.7.0
|
||||
tomli==2.0.1
|
||||
typing_extensions==4.11.0
|
||||
tzlocal==2.1
|
||||
unattended-upgrades==0.1
|
||||
urllib3==2.0.7
|
||||
wadllib==1.3.6
|
||||
websocket-client==0.59.0
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -1,3 +1,4 @@
|
||||
# docker build -t clickhouse/libfuzzer .
|
||||
ARG FROM_TAG=latest
|
||||
FROM clickhouse/test-base:$FROM_TAG
|
||||
|
||||
@ -29,7 +30,8 @@ RUN apt-get update \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
RUN pip3 install Jinja2
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
|
||||
COPY * /
|
||||
|
||||
|
27
docker/test/libfuzzer/requirements.txt
Normal file
27
docker/test/libfuzzer/requirements.txt
Normal file
@ -0,0 +1,27 @@
|
||||
blinker==1.4
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
Jinja2==3.1.4
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
MarkupSafe==2.1.5
|
||||
more-itertools==8.10.0
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
PyJWT==2.3.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -23,7 +23,6 @@ RUN apt-get update \
|
||||
python3 \
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
python3-setuptools \
|
||||
rsync \
|
||||
tree \
|
||||
tzdata \
|
||||
@ -33,12 +32,14 @@ RUN apt-get update \
|
||||
cargo \
|
||||
ripgrep \
|
||||
zstd \
|
||||
&& pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \
|
||||
&& apt-get purge --yes python3-dev g++ \
|
||||
&& apt-get autoremove --yes \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
COPY requirements.txt /
|
||||
RUN pip3 --no-cache-dir install -r requirements.txt
|
||||
|
||||
COPY run.sh /
|
||||
|
||||
CMD ["bash", "/run.sh"]
|
||||
|
32
docker/test/performance-comparison/requirements.txt
Normal file
32
docker/test/performance-comparison/requirements.txt
Normal file
@ -0,0 +1,32 @@
|
||||
blinker==1.4
|
||||
clickhouse-driver==0.2.7
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
more-itertools==8.10.0
|
||||
numpy==1.26.3
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
Pygments==2.11.2
|
||||
PyJWT==2.3.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
pytz==2023.4
|
||||
PyYAML==6.0.1
|
||||
scipy==1.12.0
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
tzlocal==2.1
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -18,11 +18,8 @@ RUN apt-get update --yes \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
RUN pip3 install \
|
||||
numpy \
|
||||
pyodbc \
|
||||
deepdiff \
|
||||
sqlglot
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
|
||||
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz"
|
||||
|
||||
|
30
docker/test/sqllogic/requirements.txt
Normal file
30
docker/test/sqllogic/requirements.txt
Normal file
@ -0,0 +1,30 @@
|
||||
blinker==1.4
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
deepdiff==7.0.1
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
more-itertools==8.10.0
|
||||
numpy==1.26.4
|
||||
oauthlib==3.2.0
|
||||
ordered-set==4.1.0
|
||||
packaging==24.1
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
PyJWT==2.3.0
|
||||
pyodbc==5.1.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
sqlglot==23.16.0
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -14,9 +14,8 @@ RUN apt-get update --yes \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
RUN pip3 install \
|
||||
pyyaml \
|
||||
clickhouse-driver
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
|
||||
ARG sqltest_repo="https://github.com/elliotchance/sqltest/"
|
||||
|
||||
|
29
docker/test/sqltest/requirements.txt
Normal file
29
docker/test/sqltest/requirements.txt
Normal file
@ -0,0 +1,29 @@
|
||||
blinker==1.4
|
||||
clickhouse-driver==0.2.7
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
more-itertools==8.10.0
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
PyJWT==2.3.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
pytz==2024.1
|
||||
PyYAML==6.0.1
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
tzlocal==5.2
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -6,7 +6,6 @@ FROM clickhouse/stateless-test:$FROM_TAG
|
||||
RUN apt-get update -y \
|
||||
&& env DEBIAN_FRONTEND=noninteractive \
|
||||
apt-get install --yes --no-install-recommends \
|
||||
python3-requests \
|
||||
nodejs \
|
||||
npm \
|
||||
&& apt-get clean \
|
||||
|
@ -25,10 +25,7 @@ RUN apt-get update -y \
|
||||
openssl \
|
||||
postgresql-client \
|
||||
python3 \
|
||||
python3-lxml \
|
||||
python3-pip \
|
||||
python3-requests \
|
||||
python3-termcolor \
|
||||
qemu-user-static \
|
||||
sqlite3 \
|
||||
sudo \
|
||||
@ -51,7 +48,8 @@ RUN curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PR
|
||||
&& unzip protoc-${PROTOC_VERSION}-linux-x86_64.zip -d /usr/local \
|
||||
&& rm protoc-${PROTOC_VERSION}-linux-x86_64.zip
|
||||
|
||||
RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
|
||||
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
&& cd /tmp/clickhouse-odbc-tmp \
|
||||
|
51
docker/test/stateless/requirements.txt
Normal file
51
docker/test/stateless/requirements.txt
Normal file
@ -0,0 +1,51 @@
|
||||
awscli==1.22.34
|
||||
blinker==1.4
|
||||
botocore==1.23.34
|
||||
certifi==2020.6.20
|
||||
chardet==4.0.0
|
||||
colorama==0.4.4
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
docutils==0.17.1
|
||||
gyp==0.1
|
||||
httplib2==0.20.2
|
||||
idna==3.3
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
Jinja2==3.1.3
|
||||
jmespath==0.10.0
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
lxml==4.8.0
|
||||
MarkupSafe==2.1.5
|
||||
more-itertools==8.10.0
|
||||
numpy==1.26.3
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pandas==1.5.3
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
pyarrow==15.0.0
|
||||
pyasn1==0.4.8
|
||||
PyJWT==2.3.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
python-dateutil==2.8.1
|
||||
pytz==2024.1
|
||||
PyYAML==6.0.1
|
||||
requests==2.32.3
|
||||
roman==3.3
|
||||
rsa==4.8
|
||||
s3transfer==0.5.0
|
||||
scipy==1.12.0
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
termcolor==1.1.0
|
||||
urllib3==1.26.5
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -6,6 +6,9 @@ source /setup_export_logs.sh
|
||||
# fail on errors, verbose and export all env variables
|
||||
set -e -x -a
|
||||
|
||||
MAX_RUN_TIME=${MAX_RUN_TIME:-10800}
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME))
|
||||
|
||||
# Choose random timezone for this test run.
|
||||
#
|
||||
# NOTE: that clickhouse-test will randomize session_timezone by itself as well
|
||||
@ -262,14 +265,17 @@ function run_tests()
|
||||
|
||||
export -f run_tests
|
||||
|
||||
|
||||
# This should be enough to setup job and collect artifacts
|
||||
TIMEOUT=$((MAX_RUN_TIME - 300))
|
||||
if [ "$NUM_TRIES" -gt "1" ]; then
|
||||
# We don't run tests with Ordinary database in PRs, only in master.
|
||||
# So run new/changed tests with Ordinary at least once in flaky check.
|
||||
timeout_with_logging "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
|
||||
timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
|
||||
| sed 's/All tests have finished//' | sed 's/No tests were run//' ||:
|
||||
fi
|
||||
|
||||
timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||:
|
||||
timeout_with_logging "$TIMEOUT" bash -c run_tests ||:
|
||||
|
||||
echo "Files in current directory"
|
||||
ls -la ./
|
||||
|
@ -38,7 +38,7 @@ function fn_exists() {
|
||||
function timeout_with_logging() {
|
||||
local exit_code=0
|
||||
|
||||
timeout "${@}" || exit_code="${?}"
|
||||
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
|
||||
|
||||
if [[ "${exit_code}" -eq "124" ]]
|
||||
then
|
||||
|
@ -23,22 +23,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
# python-magic is the same version as in Ubuntu 22.04
|
||||
RUN pip3 install \
|
||||
PyGithub \
|
||||
black==23.12.0 \
|
||||
boto3 \
|
||||
codespell==2.2.1 \
|
||||
mypy==1.8.0 \
|
||||
pylint==3.1.0 \
|
||||
python-magic==0.4.24 \
|
||||
flake8==4.0.1 \
|
||||
requests \
|
||||
thefuzz \
|
||||
tqdm==4.66.4 \
|
||||
types-requests \
|
||||
unidiff \
|
||||
jwt \
|
||||
&& rm -rf /root/.cache/pip
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r requirements.txt
|
||||
|
||||
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
|
58
docker/test/style/requirements.txt
Normal file
58
docker/test/style/requirements.txt
Normal file
@ -0,0 +1,58 @@
|
||||
aiohttp==3.9.5
|
||||
aiosignal==1.3.1
|
||||
astroid==3.1.0
|
||||
async-timeout==4.0.3
|
||||
attrs==23.2.0
|
||||
black==23.12.0
|
||||
boto3==1.34.131
|
||||
botocore==1.34.131
|
||||
certifi==2024.6.2
|
||||
cffi==1.16.0
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
codespell==2.2.1
|
||||
cryptography==42.0.8
|
||||
Deprecated==1.2.14
|
||||
dill==0.3.8
|
||||
flake8==4.0.1
|
||||
frozenlist==1.4.1
|
||||
idna==3.7
|
||||
isort==5.13.2
|
||||
jmespath==1.0.1
|
||||
jwt==1.3.1
|
||||
mccabe==0.6.1
|
||||
multidict==6.0.5
|
||||
mypy==1.8.0
|
||||
mypy-extensions==1.0.0
|
||||
packaging==24.1
|
||||
pathspec==0.9.0
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
platformdirs==4.2.2
|
||||
pycodestyle==2.8.0
|
||||
pycparser==2.22
|
||||
pyflakes==2.4.0
|
||||
PyGithub==2.3.0
|
||||
PyJWT==2.8.0
|
||||
pylint==3.1.0
|
||||
PyNaCl==1.5.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-magic==0.4.24
|
||||
PyYAML==6.0.1
|
||||
rapidfuzz==3.9.3
|
||||
requests==2.32.3
|
||||
s3transfer==0.10.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
thefuzz==0.22.1
|
||||
tomli==2.0.1
|
||||
tomlkit==0.12.5
|
||||
tqdm==4.66.4
|
||||
types-requests==2.32.0.20240622
|
||||
typing_extensions==4.12.2
|
||||
unidiff==0.7.5
|
||||
urllib3==2.2.2
|
||||
wheel==0.37.1
|
||||
wrapt==1.16.0
|
||||
yamllint==1.26.3
|
||||
yarl==1.9.4
|
26
docs/changelogs/v24.6.2.17-stable.md
Normal file
26
docs/changelogs/v24.6.2.17-stable.md
Normal file
@ -0,0 +1,26 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.6.2.17-stable (5710a8b5c0c) FIXME as compared to v24.6.1.4423-stable (dcced7c8478)
|
||||
|
||||
#### New Feature
|
||||
* Backported in [#66002](https://github.com/ClickHouse/ClickHouse/issues/66002): Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#65898](https://github.com/ClickHouse/ClickHouse/issues/65898): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Backported in [#65935](https://github.com/ClickHouse/ClickHouse/issues/65935): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#65907](https://github.com/ClickHouse/ClickHouse/issues/65907): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#65962](https://github.com/ClickHouse/ClickHouse/issues/65962): Add missing workload identity changes. [#65848](https://github.com/ClickHouse/ClickHouse/pull/65848) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Backported in [#66033](https://github.com/ClickHouse/ClickHouse/issues/66033): Follow up to [#65046](https://github.com/ClickHouse/ClickHouse/issues/65046). [#65928](https://github.com/ClickHouse/ClickHouse/pull/65928) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#66076](https://github.com/ClickHouse/ClickHouse/issues/66076): Fix support of non-const scale arguments in rounding functions. [#65983](https://github.com/ClickHouse/ClickHouse/pull/65983) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
|
||||
* Backported in [#66017](https://github.com/ClickHouse/ClickHouse/issues/66017): Fix race in s3queue. [#65986](https://github.com/ClickHouse/ClickHouse/pull/65986) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
|
@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
|
||||
- [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level
|
||||
- `password` for the file on disk
|
||||
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
|
||||
- `use_same_s3_credentials_for_base_backup`: whether base backup to S3 should inherit credentials from the query. Only works with `S3`.
|
||||
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
|
||||
- `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family.
|
||||
- `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD`
|
||||
|
@ -974,6 +974,13 @@ Default value: false
|
||||
|
||||
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
|
||||
|
||||
## use_compact_variant_discriminators_serialization {#use_compact_variant_discriminators_serialization}
|
||||
|
||||
Enables compact mode for binary serialization of discriminators in Variant data type.
|
||||
This mode allows to use significantly less memory for storing discriminators in parts when there is mostly one variant or a lot of NULL values.
|
||||
|
||||
Default value: true
|
||||
|
||||
## merge_workload
|
||||
|
||||
Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead.
|
||||
|
@ -0,0 +1,37 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/aggthrow
|
||||
sidebar_position: 101
|
||||
---
|
||||
|
||||
# aggThrow
|
||||
|
||||
This function can be used for the purpose of testing exception safety. It will throw an exception on creation with the specified probability.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
aggThrow(throw_prob)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `throw_prob` — Probability to throw on creation. [Float64](../../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- An exception: `Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT number % 2 AS even, aggThrow(number) FROM numbers(10) GROUP BY even;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
Received exception:
|
||||
Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully: While executing AggregatingTransform. (AGGREGATE_FUNCTION_THROW)
|
||||
```
|
@ -43,6 +43,7 @@ Standard aggregate functions:
|
||||
|
||||
ClickHouse-specific aggregate functions:
|
||||
|
||||
- [aggThrow](../reference/aggthrow.md)
|
||||
- [analysisOfVariance](../reference/analysis_of_variance.md)
|
||||
- [any](../reference/any_respect_nulls.md)
|
||||
- [anyHeavy](../reference/anyheavy.md)
|
||||
|
@ -83,7 +83,57 @@ Result:
|
||||
```
|
||||
## makeDate32
|
||||
|
||||
Like [makeDate](#makedate) but produces a [Date32](../data-types/date32.md).
|
||||
Creates a date of type [Date32](../../sql-reference/data-types/date32.md) from a year, month, day (or optionally a year and a day).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
makeDate32(year, [month,] day)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `month` — Month (optional). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
|
||||
:::note
|
||||
If `month` is omitted then `day` should take a value between `1` and `365`, otherwise it should take a value between `1` and `31`.
|
||||
:::
|
||||
|
||||
**Returned values**
|
||||
|
||||
- A date created from the arguments. [Date32](../../sql-reference/data-types/date32.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Create a date from a year, month, and day:
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT makeDate32(2024, 1, 1);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
2024-01-01
|
||||
```
|
||||
|
||||
Create a Date from a year and day of year:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT makeDate32(2024, 100);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
2024-04-09
|
||||
```
|
||||
|
||||
## makeDateTime
|
||||
|
||||
@ -125,12 +175,38 @@ Result:
|
||||
|
||||
## makeDateTime64
|
||||
|
||||
Like [makeDateTime](#makedatetime) but produces a [DateTime64](../data-types/datetime64.md).
|
||||
Creates a [DateTime64](../../sql-reference/data-types/datetime64.md) data type value from its components: year, month, day, hour, minute, second. With optional sub-second precision.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
makeDateTime64(year, month, day, hour, minute, second[, precision])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `year` — Year (0-9999). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `month` — Month (1-12). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `day` — Day (1-31). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `hour` — Hour (0-23). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `minute` — Minute (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `second` — Second (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `precision` — Optional precision of the sub-second component (0-9). [Integer](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A date and time created from the supplied arguments. [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]])
|
||||
SELECT makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5);
|
||||
```
|
||||
|
||||
```response
|
||||
┌─makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5)─┐
|
||||
│ 2023-05-15 10:30:45.00779 │
|
||||
└─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## timestamp
|
||||
|
@ -86,7 +86,7 @@ Returns the fully qualified domain name of the ClickHouse server.
|
||||
fqdn();
|
||||
```
|
||||
|
||||
This function is case-insensitive.
|
||||
Aliases: `fullHostName`, 'FQDN'.
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -6,44 +6,122 @@ sidebar_label: Time Window
|
||||
|
||||
# Time Window Functions
|
||||
|
||||
Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below:
|
||||
Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with [WindowView](../statements/create/view.md/#window-view-experimental) are listed below:
|
||||
|
||||
## tumble
|
||||
|
||||
A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
tumble(time_attr, interval [, timezone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type.
|
||||
- `interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type.
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`.
|
||||
- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md)).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT tumble(now(), toIntervalDay('1'))
|
||||
SELECT tumble(now(), toIntervalDay('1'));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─tumble(now(), toIntervalDay('1'))─────────────┐
|
||||
│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │
|
||||
│ ('2024-07-04 00:00:00','2024-07-05 00:00:00') │
|
||||
└───────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## tumbleStart
|
||||
|
||||
Returns the inclusive lower bound of the corresponding [tumbling window](#tumble).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
tumbleStart(time_attr, interval [, timezone]);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
|
||||
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT tumbleStart(now(), toIntervalDay('1'));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─tumbleStart(now(), toIntervalDay('1'))─┐
|
||||
│ 2024-07-04 00:00:00 │
|
||||
└────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## tumbleEnd
|
||||
|
||||
Returns the exclusive upper bound of the corresponding [tumbling window](#tumble).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
tumbleEnd(time_attr, interval [, timezone]);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
|
||||
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT tumbleEnd(now(), toIntervalDay('1'));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─tumbleEnd(now(), toIntervalDay('1'))─┐
|
||||
│ 2024-07-05 00:00:00 │
|
||||
└──────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hop
|
||||
|
||||
A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows.
|
||||
A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows.
|
||||
|
||||
``` sql
|
||||
hop(time_attr, hop_interval, window_interval [, timezone])
|
||||
@ -51,65 +129,118 @@ hop(time_attr, hop_interval, window_interval [, timezone])
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type.
|
||||
- `hop_interval` - Hop interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number.
|
||||
- `window_interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number.
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`.
|
||||
- The inclusive lower and exclusive upper bound of the corresponding hopping window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`.
|
||||
|
||||
:::note
|
||||
Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND)
|
||||
SELECT hop(now(), INTERVAL '1' DAY, INTERVAL '2' DAY);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐
|
||||
│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │
|
||||
└───────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## tumbleStart
|
||||
|
||||
Returns the inclusive lower bound of the corresponding tumbling window.
|
||||
|
||||
``` sql
|
||||
tumbleStart(bounds_tuple);
|
||||
tumbleStart(time_attr, interval [, timezone]);
|
||||
```
|
||||
|
||||
## tumbleEnd
|
||||
|
||||
Returns the exclusive upper bound of the corresponding tumbling window.
|
||||
|
||||
``` sql
|
||||
tumbleEnd(bounds_tuple);
|
||||
tumbleEnd(time_attr, interval [, timezone]);
|
||||
┌─hop(now(), toIntervalDay('1'), toIntervalDay('2'))─┐
|
||||
│ ('2024-07-03 00:00:00','2024-07-05 00:00:00') │
|
||||
└────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hopStart
|
||||
|
||||
Returns the inclusive lower bound of the corresponding hopping window.
|
||||
Returns the inclusive lower bound of the corresponding [hopping window](#hop).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
hopStart(bounds_tuple);
|
||||
hopStart(time_attr, hop_interval, window_interval [, timezone]);
|
||||
```
|
||||
**Arguments**
|
||||
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
|
||||
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The inclusive lower bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
|
||||
|
||||
:::note
|
||||
Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT hopStart(now(), INTERVAL '1' DAY, INTERVAL '2' DAY);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─hopStart(now(), toIntervalDay('1'), toIntervalDay('2'))─┐
|
||||
│ 2024-07-03 00:00:00 │
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hopEnd
|
||||
|
||||
Returns the exclusive upper bound of the corresponding hopping window.
|
||||
Returns the exclusive upper bound of the corresponding [hopping window](#hop).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
hopEnd(bounds_tuple);
|
||||
hopEnd(time_attr, hop_interval, window_interval [, timezone]);
|
||||
```
|
||||
**Arguments**
|
||||
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
|
||||
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The exclusive upper bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
|
||||
|
||||
:::note
|
||||
Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT hopEnd(now(), INTERVAL '1' DAY, INTERVAL '2' DAY);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─hopEnd(now(), toIntervalDay('1'), toIntervalDay('2'))─┐
|
||||
│ 2024-07-05 00:00:00 │
|
||||
└───────────────────────────────────────────────────────┘
|
||||
|
||||
```
|
||||
|
||||
## Related content
|
||||
|
@ -23,6 +23,7 @@ ClickHouse supports the standard grammar for defining windows and window functio
|
||||
| `GROUPS` frame | ❌ |
|
||||
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) |
|
||||
| `rank()`, `dense_rank()`, `row_number()` | ✅ |
|
||||
| `percent_rank()` | ✅ Efficiently computes the relative standing of a value within a partition in a dataset. This function effectively replaces the more verbose and computationally intensive manual SQL calculation expressed as `ifNull((rank() OVER(PARTITION BY x ORDER BY y) - 1) / nullif(count(1) OVER(PARTITION BY x) - 1, 0), 0)`|
|
||||
| `lag/lead(value, offset)` | ❌ <br/> You can use one of the following workarounds:<br/> 1) `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` <br/> 2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
|
||||
| ntile(buckets) | ✅ <br/> Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). |
|
||||
|
||||
|
@ -626,6 +626,28 @@ static void initializeAzureSDKLogger(
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(SANITIZER)
|
||||
static std::vector<String> getSanitizerNames()
|
||||
{
|
||||
std::vector<String> names;
|
||||
|
||||
#if defined(ADDRESS_SANITIZER)
|
||||
names.push_back("address");
|
||||
#endif
|
||||
#if defined(THREAD_SANITIZER)
|
||||
names.push_back("thread");
|
||||
#endif
|
||||
#if defined(MEMORY_SANITIZER)
|
||||
names.push_back("memory");
|
||||
#endif
|
||||
#if defined(UNDEFINED_BEHAVIOR_SANITIZER)
|
||||
names.push_back("undefined behavior");
|
||||
#endif
|
||||
|
||||
return names;
|
||||
}
|
||||
#endif
|
||||
|
||||
int Server::main(const std::vector<std::string> & /*args*/)
|
||||
try
|
||||
{
|
||||
@ -716,7 +738,17 @@ try
|
||||
global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable.");
|
||||
|
||||
#if defined(SANITIZER)
|
||||
global_context->addWarningMessage("Server was built with sanitizer. It will work slowly.");
|
||||
auto sanitizers = getSanitizerNames();
|
||||
|
||||
String log_message;
|
||||
if (sanitizers.empty())
|
||||
log_message = "sanitizer";
|
||||
else if (sanitizers.size() == 1)
|
||||
log_message = fmt::format("{} sanitizer", sanitizers.front());
|
||||
else
|
||||
log_message = fmt::format("sanitizers ({})", fmt::join(sanitizers, ", "));
|
||||
|
||||
global_context->addWarningMessage(fmt::format("Server was built with {}. It will work slowly.", log_message));
|
||||
#endif
|
||||
|
||||
#if defined(SANITIZE_COVERAGE) || WITH_COVERAGE
|
||||
|
@ -1093,10 +1093,4 @@ void ColumnObject::finalize()
|
||||
checkObjectHasNoAmbiguosPaths(getKeys());
|
||||
}
|
||||
|
||||
void ColumnObject::updateHashFast(SipHash & hash) const
|
||||
{
|
||||
for (const auto & entry : subcolumns)
|
||||
for (auto & part : entry->data.data)
|
||||
part->updateHashFast(hash);
|
||||
}
|
||||
}
|
||||
|
@ -242,7 +242,7 @@ public:
|
||||
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
|
||||
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
|
||||
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
|
||||
void updateHashFast(SipHash & hash) const override;
|
||||
void updateHashFast(SipHash &) const override { throwMustBeConcrete(); }
|
||||
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
|
||||
bool hasEqualValues() const override { throwMustBeConcrete(); }
|
||||
size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); }
|
||||
|
@ -711,7 +711,13 @@ void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_c
|
||||
ColumnPtr ColumnTuple::compress() const
|
||||
{
|
||||
if (columns.empty())
|
||||
return Ptr();
|
||||
{
|
||||
return ColumnCompressed::create(size(), 0,
|
||||
[n = column_length]
|
||||
{
|
||||
return ColumnTuple::create(n);
|
||||
});
|
||||
}
|
||||
|
||||
size_t byte_size = 0;
|
||||
Columns compressed;
|
||||
|
@ -5,7 +5,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size = 2)
|
||||
static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size)
|
||||
{
|
||||
if ((end - pos) & 1)
|
||||
{
|
||||
@ -23,7 +23,7 @@ static void inline hexStringDecode(const char * pos, const char * end, char *& o
|
||||
++out;
|
||||
}
|
||||
|
||||
static void inline binStringDecode(const char * pos, const char * end, char *& out)
|
||||
static void inline binStringDecode(const char * pos, const char * end, char *& out, size_t word_size)
|
||||
{
|
||||
if (pos == end)
|
||||
{
|
||||
@ -53,7 +53,7 @@ static void inline binStringDecode(const char * pos, const char * end, char *& o
|
||||
++out;
|
||||
}
|
||||
|
||||
assert((end - pos) % 8 == 0);
|
||||
chassert((end - pos) % word_size == 0);
|
||||
|
||||
while (end - pos != 0)
|
||||
{
|
||||
|
@ -1,184 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/defines.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <typeindex>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
/* This is a collections of objects derived from ItemBase.
|
||||
* Collection contains no more than one instance for each derived type.
|
||||
* The derived type is used to access the instance.
|
||||
*/
|
||||
|
||||
template<class ItemBase>
|
||||
class CollectionOfDerivedItems
|
||||
{
|
||||
public:
|
||||
using Self = CollectionOfDerivedItems<ItemBase>;
|
||||
using ItemPtr = std::shared_ptr<ItemBase>;
|
||||
|
||||
private:
|
||||
struct Rec
|
||||
{
|
||||
std::type_index type_idx;
|
||||
ItemPtr ptr;
|
||||
|
||||
bool operator<(const Rec & other) const
|
||||
{
|
||||
return type_idx < other.type_idx;
|
||||
}
|
||||
|
||||
bool operator<(const std::type_index & value) const
|
||||
{
|
||||
return type_idx < value;
|
||||
}
|
||||
|
||||
bool operator==(const Rec & other) const
|
||||
{
|
||||
return type_idx == other.type_idx;
|
||||
}
|
||||
};
|
||||
using Records = std::vector<Rec>;
|
||||
|
||||
public:
|
||||
void swap(Self & other) noexcept
|
||||
{
|
||||
records.swap(other.records);
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
records.clear();
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return records.empty();
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return records.size();
|
||||
}
|
||||
|
||||
Self clone() const
|
||||
{
|
||||
Self result;
|
||||
result.records.reserve(records.size());
|
||||
for (const auto & rec : records)
|
||||
result.records.emplace_back(rec.type_idx, rec.ptr->clone());
|
||||
return result;
|
||||
}
|
||||
|
||||
void append(Self && other)
|
||||
{
|
||||
auto middle_idx = records.size();
|
||||
std::move(other.records.begin(), other.records.end(), std::back_inserter(records));
|
||||
std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end());
|
||||
chassert(isUniqTypes());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void add(std::shared_ptr<T> info)
|
||||
{
|
||||
static_assert(std::is_base_of_v<ItemBase, T>, "Template parameter must inherit items base class");
|
||||
return addImpl(std::type_index(typeid(T)), std::move(info));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
std::shared_ptr<T> get() const
|
||||
{
|
||||
static_assert(std::is_base_of_v<ItemBase, T>, "Template parameter must inherit items base class");
|
||||
auto it = getImpl(std::type_index(typeid(T)));
|
||||
if (it == records.cend())
|
||||
return nullptr;
|
||||
auto cast = std::dynamic_pointer_cast<T>(it->ptr);
|
||||
chassert(cast);
|
||||
return cast;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
std::shared_ptr<T> extract()
|
||||
{
|
||||
static_assert(std::is_base_of_v<ItemBase, T>, "Template parameter must inherit items base class");
|
||||
auto it = getImpl(std::type_index(typeid(T)));
|
||||
if (it == records.cend())
|
||||
return nullptr;
|
||||
auto cast = std::dynamic_pointer_cast<T>(it->ptr);
|
||||
chassert(cast);
|
||||
|
||||
records.erase(it);
|
||||
return cast;
|
||||
}
|
||||
|
||||
std::string debug() const
|
||||
{
|
||||
std::string result;
|
||||
|
||||
for (auto & rec : records)
|
||||
{
|
||||
result.append(rec.type_idx.name());
|
||||
result.append(" ");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
bool isUniqTypes() const
|
||||
{
|
||||
auto uniq_it = std::adjacent_find(records.begin(), records.end());
|
||||
|
||||
return uniq_it == records.end();
|
||||
}
|
||||
|
||||
void addImpl(std::type_index type_idx, ItemPtr item)
|
||||
{
|
||||
auto it = std::lower_bound(records.begin(), records.end(), type_idx);
|
||||
|
||||
if (it == records.end())
|
||||
{
|
||||
records.emplace_back(type_idx, item);
|
||||
return;
|
||||
}
|
||||
|
||||
if (it->type_idx == type_idx)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name());
|
||||
|
||||
|
||||
records.emplace(it, type_idx, item);
|
||||
|
||||
chassert(isUniqTypes());
|
||||
}
|
||||
|
||||
Records::const_iterator getImpl(std::type_index type_idx) const
|
||||
{
|
||||
auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx);
|
||||
|
||||
if (it == records.cend())
|
||||
return records.cend();
|
||||
|
||||
if (it->type_idx != type_idx)
|
||||
return records.cend();
|
||||
|
||||
return it;
|
||||
}
|
||||
|
||||
Records records;
|
||||
};
|
||||
|
||||
}
|
@ -36,7 +36,7 @@ class IColumn;
|
||||
M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\
|
||||
M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \
|
||||
M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \
|
||||
M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \
|
||||
M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \
|
||||
M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \
|
||||
M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \
|
||||
M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \
|
||||
@ -634,8 +634,9 @@ class IColumn;
|
||||
M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \
|
||||
M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
|
||||
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \
|
||||
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \
|
||||
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
|
||||
M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \
|
||||
M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \
|
||||
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
|
||||
M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \
|
||||
M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW <name> REFRESH ...).", 0) \
|
||||
@ -953,7 +954,6 @@ class IColumn;
|
||||
|
||||
#define OBSOLETE_SETTINGS(M, ALIAS) \
|
||||
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
|
||||
MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 1) \
|
||||
MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \
|
||||
MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \
|
||||
MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \
|
||||
|
@ -2,9 +2,11 @@
|
||||
#include <DataTypes/Serializations/SerializationDynamic.h>
|
||||
#include <DataTypes/Serializations/SerializationDynamicElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnDynamic.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <Core/Field.h>
|
||||
@ -110,28 +112,58 @@ std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnDa
|
||||
}
|
||||
|
||||
/// Extract nested subcolumn of requested dynamic subcolumn if needed.
|
||||
if (!subcolumn_nested_name.empty())
|
||||
/// If requested subcolumn is null map, it's processed separately as there is no Nullable type yet.
|
||||
bool is_null_map_subcolumn = subcolumn_nested_name == "null";
|
||||
if (is_null_map_subcolumn)
|
||||
{
|
||||
res->type = std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
else if (!subcolumn_nested_name.empty())
|
||||
{
|
||||
res = getSubcolumnData(subcolumn_nested_name, *res, throw_if_null);
|
||||
if (!res)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
res->serialization = std::make_shared<SerializationDynamicElement>(res->serialization, subcolumn_type->getName());
|
||||
res->type = makeNullableOrLowCardinalityNullableSafe(res->type);
|
||||
res->serialization = std::make_shared<SerializationDynamicElement>(res->serialization, subcolumn_type->getName(), is_null_map_subcolumn);
|
||||
/// Make resulting subcolumn Nullable only if type subcolumn can be inside Nullable or can be LowCardinality(Nullable()).
|
||||
bool make_subcolumn_nullable = subcolumn_type->canBeInsideNullable() || subcolumn_type->lowCardinality();
|
||||
if (!is_null_map_subcolumn && make_subcolumn_nullable)
|
||||
res->type = makeNullableOrLowCardinalityNullableSafe(res->type);
|
||||
|
||||
if (data.column)
|
||||
{
|
||||
if (discriminator)
|
||||
{
|
||||
/// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator to
|
||||
/// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to
|
||||
/// create full subcolumn from variant according to discriminators.
|
||||
const auto & variant_column = assert_cast<const ColumnDynamic &>(*data.column).getVariantColumn();
|
||||
auto creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), "", *discriminator, variant_column.localDiscriminatorByGlobal(*discriminator));
|
||||
res->column = creator.create(res->column);
|
||||
std::unique_ptr<ISerialization::ISubcolumnCreator> creator;
|
||||
if (is_null_map_subcolumn)
|
||||
creator = std::make_unique<SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator>(
|
||||
variant_column.getLocalDiscriminatorsPtr(),
|
||||
"",
|
||||
*discriminator,
|
||||
variant_column.localDiscriminatorByGlobal(*discriminator));
|
||||
else
|
||||
creator = std::make_unique<SerializationVariantElement::VariantSubcolumnCreator>(
|
||||
variant_column.getLocalDiscriminatorsPtr(),
|
||||
"",
|
||||
*discriminator,
|
||||
variant_column.localDiscriminatorByGlobal(*discriminator),
|
||||
make_subcolumn_nullable);
|
||||
res->column = creator->create(res->column);
|
||||
}
|
||||
/// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values.
|
||||
else if (is_null_map_subcolumn)
|
||||
{
|
||||
/// Fill null map with 1 when there is no such Dynamic subcolumn.
|
||||
auto column = ColumnUInt8::create();
|
||||
assert_cast<ColumnUInt8 &>(*column).getData().resize_fill(data.column->size(), 1);
|
||||
res->column = std::move(column);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values.
|
||||
auto column = res->type->createColumn();
|
||||
column->insertManyDefaults(data.column->size());
|
||||
res->column = std::move(column);
|
||||
|
@ -173,7 +173,7 @@ bool IDataType::hasDynamicSubcolumns() const
|
||||
auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
|
||||
auto callback = [&](const SubstreamPath &, const String &, const SubstreamData & subcolumn_data)
|
||||
{
|
||||
has_dynamic_subcolumns |= subcolumn_data.type->hasDynamicSubcolumnsData();
|
||||
has_dynamic_subcolumns |= subcolumn_data.type && subcolumn_data.type->hasDynamicSubcolumnsData();
|
||||
};
|
||||
forEachSubcolumn(callback, data);
|
||||
return has_dynamic_subcolumns;
|
||||
|
@ -64,6 +64,9 @@ String ISerialization::Substream::toString() const
|
||||
if (type == VariantElement)
|
||||
return fmt::format("VariantElement({})", variant_element_name);
|
||||
|
||||
if (type == VariantElementNullMap)
|
||||
return fmt::format("VariantElementNullMap({}.null)", variant_element_name);
|
||||
|
||||
return String(magic_enum::enum_name(type));
|
||||
}
|
||||
|
||||
@ -195,6 +198,8 @@ String getNameForSubstreamPath(
|
||||
stream_name += ".variant_offsets";
|
||||
else if (it->type == Substream::VariantElement)
|
||||
stream_name += "." + it->variant_element_name;
|
||||
else if (it->type == Substream::VariantElementNullMap)
|
||||
stream_name += "." + it->variant_element_name + ".null";
|
||||
else if (it->type == SubstreamType::DynamicStructure)
|
||||
stream_name += ".dynamic_structure";
|
||||
}
|
||||
@ -395,7 +400,8 @@ bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t pref
|
||||
return path[last_elem].type == Substream::NullMap
|
||||
|| path[last_elem].type == Substream::TupleElement
|
||||
|| path[last_elem].type == Substream::ArraySizes
|
||||
|| path[last_elem].type == Substream::VariantElement;
|
||||
|| path[last_elem].type == Substream::VariantElement
|
||||
|| path[last_elem].type == Substream::VariantElementNullMap;
|
||||
}
|
||||
|
||||
ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len)
|
||||
|
@ -184,6 +184,7 @@ public:
|
||||
VariantOffsets,
|
||||
VariantElements,
|
||||
VariantElement,
|
||||
VariantElementNullMap,
|
||||
|
||||
DynamicData,
|
||||
DynamicStructure,
|
||||
@ -256,6 +257,8 @@ public:
|
||||
|
||||
bool position_independent_encoding = true;
|
||||
|
||||
bool use_compact_variant_discriminators_serialization = false;
|
||||
|
||||
enum class DynamicStatisticsMode
|
||||
{
|
||||
NONE, /// Don't write statistics.
|
||||
@ -434,6 +437,9 @@ protected:
|
||||
template <typename State, typename StatePtr>
|
||||
State * checkAndGetState(const StatePtr & state) const;
|
||||
|
||||
template <typename State, typename StatePtr>
|
||||
static State * checkAndGetState(const StatePtr & state, const ISerialization * serialization);
|
||||
|
||||
[[noreturn]] void throwUnexpectedDataAfterParsedValue(IColumn & column, ReadBuffer & istr, const FormatSettings &, const String & type_name) const;
|
||||
};
|
||||
|
||||
@ -444,10 +450,16 @@ using SubstreamType = ISerialization::Substream::Type;
|
||||
|
||||
template <typename State, typename StatePtr>
|
||||
State * ISerialization::checkAndGetState(const StatePtr & state) const
|
||||
{
|
||||
return checkAndGetState<State, StatePtr>(state, this);
|
||||
}
|
||||
|
||||
template <typename State, typename StatePtr>
|
||||
State * ISerialization::checkAndGetState(const StatePtr & state, const ISerialization * serialization)
|
||||
{
|
||||
if (!state)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Got empty state for {}", demangle(typeid(*this).name()));
|
||||
"Got empty state for {}", demangle(typeid(*serialization).name()));
|
||||
|
||||
auto * state_concrete = typeid_cast<State *>(state.get());
|
||||
if (!state_concrete)
|
||||
@ -455,7 +467,7 @@ State * ISerialization::checkAndGetState(const StatePtr & state) const
|
||||
auto & state_ref = *state;
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Invalid State for {}. Expected: {}, got {}",
|
||||
demangle(typeid(*this).name()),
|
||||
demangle(typeid(*serialization).name()),
|
||||
demangle(typeid(State).name()),
|
||||
demangle(typeid(state_ref).name()));
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <DataTypes/Serializations/SerializationDynamicElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||
#include <DataTypes/Serializations/SerializationDynamic.h>
|
||||
#include <DataTypes/DataTypeVariant.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
@ -77,7 +78,10 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix(
|
||||
if (auto global_discr = assert_cast<const DataTypeVariant &>(*variant_type).tryGetVariantDiscriminator(dynamic_element_name))
|
||||
{
|
||||
settings.path.push_back(Substream::DynamicData);
|
||||
dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElement>(nested_serialization, dynamic_element_name, *global_discr);
|
||||
if (is_null_map_subcolumn)
|
||||
dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElementNullMap>(dynamic_element_name, *global_discr);
|
||||
else
|
||||
dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElement>(nested_serialization, dynamic_element_name, *global_discr);
|
||||
dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
@ -98,7 +102,16 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
if (!state)
|
||||
{
|
||||
if (is_null_map_subcolumn)
|
||||
{
|
||||
auto mutable_column = result_column->assumeMutable();
|
||||
auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||
data.resize_fill(data.size() + limit, 1);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
auto * dynamic_element_state = checkAndGetState<DeserializeBinaryBulkStateDynamicElement>(state);
|
||||
|
||||
@ -108,6 +121,12 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
else if (is_null_map_subcolumn)
|
||||
{
|
||||
auto mutable_column = result_column->assumeMutable();
|
||||
auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||
data.resize_fill(data.size() + limit, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto mutable_column = result_column->assumeMutable();
|
||||
|
@ -13,11 +13,11 @@ private:
|
||||
/// To be able to deserialize Dynamic element as a subcolumn
|
||||
/// we need its type name and global discriminator.
|
||||
String dynamic_element_name;
|
||||
bool is_null_map_subcolumn;
|
||||
|
||||
public:
|
||||
SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_)
|
||||
: SerializationWrapper(nested_)
|
||||
, dynamic_element_name(dynamic_element_name_)
|
||||
SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, bool is_null_map_subcolumn_ = false)
|
||||
: SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), is_null_map_subcolumn(is_null_map_subcolumn_)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <DataTypes/Serializations/SerializationVariant.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||
#include <DataTypes/Serializations/SerializationNumber.h>
|
||||
#include <DataTypes/Serializations/SerializationNullable.h>
|
||||
#include <DataTypes/Serializations/SerializationNamed.h>
|
||||
@ -30,12 +31,18 @@ namespace ErrorCodes
|
||||
|
||||
struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState
|
||||
{
|
||||
std::vector<ISerialization::SerializeBinaryBulkStatePtr> states;
|
||||
explicit SerializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode)
|
||||
{
|
||||
}
|
||||
|
||||
SerializationVariant::DiscriminatorsSerializationMode discriminators_mode;
|
||||
std::vector<ISerialization::SerializeBinaryBulkStatePtr> variant_states;
|
||||
};
|
||||
|
||||
struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState
|
||||
{
|
||||
std::vector<ISerialization::DeserializeBinaryBulkStatePtr> states;
|
||||
ISerialization::DeserializeBinaryBulkStatePtr discriminators_state;
|
||||
std::vector<ISerialization::DeserializeBinaryBulkStatePtr> variant_states;
|
||||
};
|
||||
|
||||
void SerializationVariant::enumerateStreams(
|
||||
@ -65,13 +72,19 @@ void SerializationVariant::enumerateStreams(
|
||||
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
settings.path.back().creator = std::make_shared<SerializationVariantElement::VariantSubcolumnCreator>(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i);
|
||||
DataTypePtr type = type_variant ? type_variant->getVariant(i) : nullptr;
|
||||
settings.path.back().creator = std::make_shared<SerializationVariantElement::VariantSubcolumnCreator>(
|
||||
local_discriminators,
|
||||
variant_names[i],
|
||||
i,
|
||||
column_variant ? column_variant->localDiscriminatorByGlobal(i) : i,
|
||||
!type || type->canBeInsideNullable() || type->lowCardinality());
|
||||
|
||||
auto variant_data = SubstreamData(variants[i])
|
||||
.withType(type_variant ? type_variant->getVariant(i) : nullptr)
|
||||
.withType(type)
|
||||
.withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr)
|
||||
.withSerializationInfo(data.serialization_info)
|
||||
.withDeserializeState(variant_deserialize_state ? variant_deserialize_state->states[i] : nullptr);
|
||||
.withDeserializeState(variant_deserialize_state ? variant_deserialize_state->variant_states[i] : nullptr);
|
||||
|
||||
addVariantElementToPath(settings.path, i);
|
||||
settings.path.back().data = variant_data;
|
||||
@ -79,6 +92,24 @@ void SerializationVariant::enumerateStreams(
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
/// Variant subcolumns like variant.Type have type Nullable(Type), so we want to support reading null map subcolumn from it: variant.Type.null.
|
||||
/// Nullable column is created during deserialization of a variant subcolumn according to the discriminators, so we don't have actual Nullable
|
||||
/// serialization with null map subcolumn. To be able to read null map subcolumn from the variant subcolumn we use special serialization
|
||||
/// SerializationVariantElementNullMap.
|
||||
auto null_map_data = SubstreamData(std::make_shared<SerializationNumber<UInt8>>())
|
||||
.withType(type_variant ? std::make_shared<DataTypeUInt8>() : nullptr)
|
||||
.withColumn(column_variant ? ColumnUInt8::create() : nullptr);
|
||||
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
settings.path.back().creator = std::make_shared<SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator>(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i);
|
||||
settings.path.push_back(Substream::VariantElementNullMap);
|
||||
settings.path.back().variant_element_name = variant_names[i];
|
||||
settings.path.back().data = null_map_data;
|
||||
callback(settings.path);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
@ -87,17 +118,26 @@ void SerializationVariant::serializeBinaryBulkStatePrefix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
auto * discriminators_stream = settings.getter(settings.path);
|
||||
settings.path.pop_back();
|
||||
|
||||
auto variant_state = std::make_shared<SerializeBinaryBulkStateVariant>();
|
||||
variant_state->states.resize(variants.size());
|
||||
if (!discriminators_stream)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for VariantDiscriminators in SerializationVariant::serializeBinaryBulkStatePrefix");
|
||||
|
||||
UInt64 mode = settings.use_compact_variant_discriminators_serialization ? DiscriminatorsSerializationMode::COMPACT : DiscriminatorsSerializationMode::BASIC;
|
||||
writeBinaryLittleEndian(mode, *discriminators_stream);
|
||||
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
auto variant_state = std::make_shared<SerializeBinaryBulkStateVariant>(mode);
|
||||
variant_state->variant_states.resize(variants.size());
|
||||
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->states[i]);
|
||||
variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->variant_states[i]);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
@ -116,7 +156,7 @@ void SerializationVariant::serializeBinaryBulkStateSuffix(
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->states[i]);
|
||||
variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->variant_states[i]);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
settings.path.pop_back();
|
||||
@ -128,14 +168,19 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsDeserializeStatesCache * cache) const
|
||||
{
|
||||
DeserializeBinaryBulkStatePtr discriminators_state = deserializeDiscriminatorsStatePrefix(settings, cache);
|
||||
if (!discriminators_state)
|
||||
return;
|
||||
|
||||
auto variant_state = std::make_shared<DeserializeBinaryBulkStateVariant>();
|
||||
variant_state->states.resize(variants.size());
|
||||
variant_state->discriminators_state = discriminators_state;
|
||||
variant_state->variant_states.resize(variants.size());
|
||||
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i], cache);
|
||||
variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->variant_states[i], cache);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
@ -143,6 +188,29 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix(
|
||||
state = std::move(variant_state);
|
||||
}
|
||||
|
||||
ISerialization::DeserializeBinaryBulkStatePtr SerializationVariant::deserializeDiscriminatorsStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
SubstreamsDeserializeStatesCache * cache)
|
||||
{
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
|
||||
DeserializeBinaryBulkStatePtr discriminators_state = nullptr;
|
||||
if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path))
|
||||
{
|
||||
discriminators_state = cached_state;
|
||||
}
|
||||
else if (auto * discriminators_stream = settings.getter(settings.path))
|
||||
{
|
||||
UInt64 mode;
|
||||
readBinaryLittleEndian(mode, *discriminators_stream);
|
||||
discriminators_state = std::make_shared<DeserializeBinaryBulkStateVariantDiscriminators>(mode);
|
||||
addToSubstreamsDeserializeStatesCache(cache, settings.path, discriminators_state);
|
||||
}
|
||||
|
||||
settings.path.pop_back();
|
||||
return discriminators_state;
|
||||
}
|
||||
|
||||
|
||||
void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(
|
||||
const IColumn & column,
|
||||
@ -165,13 +233,71 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
||||
|
||||
auto * variant_state = checkAndGetState<SerializeBinaryBulkStateVariant>(state);
|
||||
|
||||
/// If offset = 0 and limit == col.size() or we have only NULLs, we don't need to calculate
|
||||
/// Don't write anything if column is empty.
|
||||
if (limit == 0)
|
||||
return;
|
||||
|
||||
/// Write number of rows in this granule in compact mode.
|
||||
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
|
||||
writeVarUInt(UInt64(limit), *discriminators_stream);
|
||||
|
||||
/// If column has only one none empty discriminators and no NULLs we don't need to
|
||||
/// calculate limits for variants and use provided offset/limit.
|
||||
if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
|
||||
{
|
||||
auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr);
|
||||
|
||||
/// In compact mode write the format of the granule and single non-empty discriminator.
|
||||
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
|
||||
{
|
||||
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
|
||||
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
|
||||
}
|
||||
/// For basic mode just serialize this discriminator limit times.
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < limit; ++i)
|
||||
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
|
||||
}
|
||||
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
addVariantElementToPath(settings.path, non_empty_global_discr);
|
||||
/// We can use the same offset/limit as for whole Variant column
|
||||
variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->variant_states[non_empty_global_discr]);
|
||||
variants_statistics[variant_names[non_empty_global_discr]] += limit;
|
||||
settings.path.pop_back();
|
||||
settings.path.pop_back();
|
||||
return;
|
||||
}
|
||||
/// If column has only NULLs, just serialize NULL discriminators.
|
||||
else if (col.hasOnlyNulls())
|
||||
{
|
||||
/// In compact mode write single NULL_DISCRIMINATOR.
|
||||
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
|
||||
{
|
||||
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
|
||||
writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream);
|
||||
}
|
||||
/// In basic mode write NULL_DISCRIMINATOR limit times.
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < limit; ++i)
|
||||
writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/// If offset = 0 and limit == col.size() we don't need to calculate
|
||||
/// offsets and limits for variants and need to just serialize whole columns.
|
||||
if ((offset == 0 && limit == col.size()) || col.hasOnlyNulls())
|
||||
if ((offset == 0 && limit == col.size()))
|
||||
{
|
||||
/// First, serialize discriminators.
|
||||
/// If we have only NULLs or local and global discriminators are the same, just serialize the column as is.
|
||||
if (col.hasOnlyNulls() || col.hasGlobalVariantsOrder())
|
||||
/// Here we are sure that column contains different discriminators, use plain granule format in compact mode.
|
||||
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
|
||||
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream);
|
||||
|
||||
/// If local and global discriminators are the same, just serialize the column as is.
|
||||
if (col.hasGlobalVariantsOrder())
|
||||
{
|
||||
SerializationNumber<ColumnVariant::Discriminator>().serializeBinaryBulk(col.getLocalDiscriminatorsColumn(), *discriminators_stream, offset, limit);
|
||||
}
|
||||
@ -188,7 +314,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]);
|
||||
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->variant_states[i]);
|
||||
variants_statistics[variant_names[i]] += col.getVariantByGlobalDiscriminator(i).size();
|
||||
settings.path.pop_back();
|
||||
}
|
||||
@ -196,36 +322,16 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
||||
return;
|
||||
}
|
||||
|
||||
/// If we have only one non empty variant and no NULLs, we can use the same limit offset for this variant.
|
||||
if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
|
||||
{
|
||||
/// First, serialize discriminators.
|
||||
/// We know that all discriminators are the same, so we just need to serialize this discriminator limit times.
|
||||
auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr);
|
||||
for (size_t i = 0; i != limit; ++i)
|
||||
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
|
||||
|
||||
/// Second, serialize non-empty variant (other variants are empty and we can skip their serialization).
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
addVariantElementToPath(settings.path, non_empty_global_discr);
|
||||
/// We can use the same offset/limit as for whole Variant column
|
||||
variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]);
|
||||
variants_statistics[variant_names[non_empty_global_discr]] += limit;
|
||||
settings.path.pop_back();
|
||||
settings.path.pop_back();
|
||||
return;
|
||||
}
|
||||
|
||||
/// In general case we should iterate through local discriminators in range [offset, offset + limit] to serialize global discriminators and calculate offset/limit pair for each variant.
|
||||
const auto & local_discriminators = col.getLocalDiscriminators();
|
||||
const auto & offsets = col.getOffsets();
|
||||
std::vector<std::pair<size_t, size_t>> variant_offsets_and_limits(variants.size(), {0, 0});
|
||||
size_t end = offset + limit;
|
||||
size_t num_non_empty_variants_in_range = 0;
|
||||
ColumnVariant::Discriminator last_non_empty_variant_discr = 0;
|
||||
for (size_t i = offset; i < end; ++i)
|
||||
{
|
||||
auto global_discr = col.globalDiscriminatorByLocal(local_discriminators[i]);
|
||||
writeBinaryLittleEndian(global_discr, *discriminators_stream);
|
||||
|
||||
if (global_discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
{
|
||||
/// If we see this discriminator for the first time, update offset
|
||||
@ -233,9 +339,38 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
||||
variant_offsets_and_limits[global_discr].first = offsets[i];
|
||||
/// Update limit for this discriminator.
|
||||
++variant_offsets_and_limits[global_discr].second;
|
||||
++num_non_empty_variants_in_range;
|
||||
last_non_empty_variant_discr = global_discr;
|
||||
}
|
||||
}
|
||||
|
||||
/// In basic mode just serialize discriminators as is row by row.
|
||||
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::BASIC)
|
||||
{
|
||||
for (size_t i = offset; i < end; ++i)
|
||||
writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream);
|
||||
}
|
||||
/// In compact mode check if we have the same discriminator for all rows in this granule.
|
||||
/// First, check if all values in granule are NULLs.
|
||||
else if (num_non_empty_variants_in_range == 0)
|
||||
{
|
||||
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
|
||||
writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream);
|
||||
}
|
||||
/// Then, check if there is only 1 variant and no NULLs in this granule.
|
||||
else if (num_non_empty_variants_in_range == 1 && variant_offsets_and_limits[last_non_empty_variant_discr].second == limit)
|
||||
{
|
||||
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
|
||||
writeBinaryLittleEndian(last_non_empty_variant_discr, *discriminators_stream);
|
||||
}
|
||||
/// Otherwise there are different discriminators in this granule.
|
||||
else
|
||||
{
|
||||
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream);
|
||||
for (size_t i = offset; i < end; ++i)
|
||||
writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream);
|
||||
}
|
||||
|
||||
/// Serialize variants in global order.
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
@ -249,7 +384,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
||||
variant_offsets_and_limits[i].first,
|
||||
variant_offsets_and_limits[i].second,
|
||||
settings,
|
||||
variant_state->states[i]);
|
||||
variant_state->variant_states[i]);
|
||||
variants_statistics[variant_names[i]] += variant_offsets_and_limits[i].second;
|
||||
settings.path.pop_back();
|
||||
}
|
||||
@ -284,39 +419,68 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
|
||||
|
||||
/// First, deserialize discriminators.
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
|
||||
DeserializeBinaryBulkStateVariant * variant_state = nullptr;
|
||||
std::vector<size_t> variant_limits;
|
||||
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
|
||||
{
|
||||
variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
|
||||
col.getLocalDiscriminatorsPtr() = cached_discriminators;
|
||||
}
|
||||
else
|
||||
else if (auto * discriminators_stream = settings.getter(settings.path))
|
||||
{
|
||||
auto * discriminators_stream = settings.getter(settings.path);
|
||||
if (!discriminators_stream)
|
||||
return;
|
||||
variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
|
||||
auto * discriminators_state = checkAndGetState<DeserializeBinaryBulkStateVariantDiscriminators>(variant_state->discriminators_state);
|
||||
|
||||
/// Deserialize discriminators according to serialization mode.
|
||||
if (discriminators_state->mode.value == DiscriminatorsSerializationMode::BASIC)
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0);
|
||||
else
|
||||
variant_limits = deserializeCompactDiscriminators(col.getLocalDiscriminatorsPtr(), limit, discriminators_stream, settings.continuous_reading, *discriminators_state);
|
||||
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0);
|
||||
addToSubstreamsCache(cache, settings.path, col.getLocalDiscriminatorsPtr());
|
||||
}
|
||||
/// It may happen that there is no such stream, in this case just do nothing.
|
||||
else
|
||||
{
|
||||
settings.path.pop_back();
|
||||
return;
|
||||
}
|
||||
|
||||
settings.path.pop_back();
|
||||
|
||||
/// Second, calculate limits for each variant by iterating through new discriminators.
|
||||
std::vector<size_t> variant_limits(variants.size(), 0);
|
||||
auto & discriminators_data = col.getLocalDiscriminators();
|
||||
size_t discriminators_offset = discriminators_data.size() - limit;
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
/// Second, calculate limits for each variant by iterating through new discriminators
|
||||
/// if we didn't do it during discriminators deserialization.
|
||||
if (variant_limits.empty())
|
||||
{
|
||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
++variant_limits[discr];
|
||||
variant_limits.resize(variants.size(), 0);
|
||||
auto & discriminators_data = col.getLocalDiscriminators();
|
||||
|
||||
/// We can actually read less than limit discriminators and we cannot determine the actual number of read rows
|
||||
/// by discriminators column as it could be taken from the substreams cache. And we need actual number of read
|
||||
/// rows to fill offsets correctly later if they are not in the cache. We can determine if offsets column is in cache
|
||||
/// or not by comparing it with discriminators column size (they should be the same when offsets are in cache).
|
||||
/// If offsets are not in the cache, we can use it's size to determine the actual number of read rows.
|
||||
size_t num_new_discriminators = limit;
|
||||
size_t offsets_size = col.getOffsetsPtr()->size();
|
||||
if (discriminators_data.size() > offsets_size)
|
||||
num_new_discriminators = discriminators_data.size() - offsets_size;
|
||||
size_t discriminators_offset = discriminators_data.size() - num_new_discriminators;
|
||||
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
{
|
||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
++variant_limits[discr];
|
||||
}
|
||||
}
|
||||
|
||||
/// Now we can deserialize variants according to their limits.
|
||||
auto * variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->states[i], cache);
|
||||
variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->variant_states[i], cache);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
settings.path.pop_back();
|
||||
@ -336,20 +500,49 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & offsets = col.getOffsets();
|
||||
offsets.reserve(offsets.size() + limit);
|
||||
std::vector<size_t> variant_offsets;
|
||||
variant_offsets.reserve(variants.size());
|
||||
size_t num_non_empty_variants = 0;
|
||||
ColumnVariant::Discriminator last_non_empty_discr = 0;
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]);
|
||||
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
{
|
||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||
if (discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
offsets.emplace_back();
|
||||
else
|
||||
offsets.push_back(variant_offsets[discr]++);
|
||||
if (variant_limits[i])
|
||||
{
|
||||
++num_non_empty_variants;
|
||||
last_non_empty_discr = i;
|
||||
}
|
||||
|
||||
variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]);
|
||||
}
|
||||
|
||||
auto & discriminators_data = col.getLocalDiscriminators();
|
||||
auto & offsets = col.getOffsets();
|
||||
size_t num_new_offsets = discriminators_data.size() - offsets.size();
|
||||
offsets.reserve(offsets.size() + num_new_offsets);
|
||||
/// If there are only NULLs were read, fill offsets with 0.
|
||||
if (num_non_empty_variants == 0)
|
||||
{
|
||||
offsets.resize_fill(discriminators_data.size(), 0);
|
||||
}
|
||||
/// If there is only 1 variant and no NULLs was read, fill offsets with sequential offsets of this variant.
|
||||
else if (num_non_empty_variants == 1 && variant_limits[last_non_empty_discr] == num_new_offsets)
|
||||
{
|
||||
size_t first_offset = col.getVariantByLocalDiscriminator(last_non_empty_discr).size() - num_new_offsets;
|
||||
for (size_t i = 0; i != num_new_offsets; ++i)
|
||||
offsets.push_back(first_offset + i);
|
||||
}
|
||||
/// Otherwise iterate through discriminators and fill offsets accordingly.
|
||||
else
|
||||
{
|
||||
size_t start = offsets.size();
|
||||
for (size_t i = start; i != discriminators_data.size(); ++i)
|
||||
{
|
||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||
if (discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
offsets.emplace_back();
|
||||
else
|
||||
offsets.push_back(variant_offsets[discr]++);
|
||||
}
|
||||
}
|
||||
|
||||
addToSubstreamsCache(cache, settings.path, col.getOffsetsPtr());
|
||||
@ -357,6 +550,72 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
std::vector<size_t> SerializationVariant::deserializeCompactDiscriminators(
|
||||
DB::ColumnPtr & discriminators_column,
|
||||
size_t limit,
|
||||
ReadBuffer * stream,
|
||||
bool continuous_reading,
|
||||
DeserializeBinaryBulkStateVariantDiscriminators & state) const
|
||||
{
|
||||
auto & discriminators = assert_cast<ColumnVariant::ColumnDiscriminators &>(*discriminators_column->assumeMutable());
|
||||
auto & discriminators_data = discriminators.getData();
|
||||
|
||||
/// Reset state if we are reading from the start of the granule and not from the previous position in the file.
|
||||
if (!continuous_reading)
|
||||
state.remaining_rows_in_granule = 0;
|
||||
|
||||
/// Calculate limits for variants during discriminators deserialization.
|
||||
std::vector<size_t> variant_limits(variants.size(), 0);
|
||||
while (limit)
|
||||
{
|
||||
/// If we read all rows from current granule, start reading the next one.
|
||||
if (state.remaining_rows_in_granule == 0)
|
||||
{
|
||||
if (stream->eof())
|
||||
return variant_limits;
|
||||
|
||||
readDiscriminatorsGranuleStart(state, stream);
|
||||
}
|
||||
|
||||
size_t limit_in_granule = std::min(limit, state.remaining_rows_in_granule);
|
||||
if (state.granule_format == CompactDiscriminatorsGranuleFormat::COMPACT)
|
||||
{
|
||||
auto & data = discriminators.getData();
|
||||
data.resize_fill(data.size() + limit_in_granule, state.compact_discr);
|
||||
if (state.compact_discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
variant_limits[state.compact_discr] += limit_in_granule;
|
||||
}
|
||||
else
|
||||
{
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0);
|
||||
size_t start = discriminators_data.size() - limit_in_granule;
|
||||
for (size_t i = start; i != discriminators_data.size(); ++i)
|
||||
{
|
||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
++variant_limits[discr];
|
||||
}
|
||||
}
|
||||
|
||||
state.remaining_rows_in_granule -= limit_in_granule;
|
||||
limit -= limit_in_granule;
|
||||
}
|
||||
|
||||
return variant_limits;
|
||||
}
|
||||
|
||||
void SerializationVariant::readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, DB::ReadBuffer * stream)
|
||||
{
|
||||
UInt64 granule_size;
|
||||
readVarUInt(granule_size, *stream);
|
||||
state.remaining_rows_in_granule = granule_size;
|
||||
UInt8 granule_format;
|
||||
readBinaryLittleEndian(granule_format, *stream);
|
||||
state.granule_format = static_cast<CompactDiscriminatorsGranuleFormat>(granule_format);
|
||||
if (granule_format == CompactDiscriminatorsGranuleFormat::COMPACT)
|
||||
readBinaryLittleEndian(state.compact_discr, *stream);
|
||||
}
|
||||
|
||||
void SerializationVariant::addVariantElementToPath(DB::ISerialization::SubstreamPath & path, size_t i) const
|
||||
{
|
||||
path.push_back(Substream::VariantElement);
|
||||
|
@ -2,10 +2,18 @@
|
||||
|
||||
#include <DataTypes/Serializations/ISerialization.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
|
||||
/// Class for serializing/deserializing column with Variant type.
|
||||
/// It supports both text and binary bulk serializations/deserializations.
|
||||
///
|
||||
@ -18,6 +26,17 @@ namespace DB
|
||||
///
|
||||
/// During binary bulk serialization it transforms local discriminators
|
||||
/// to global and serializes them into a separate stream VariantDiscriminators.
|
||||
/// There are 2 modes of serialising discriminators:
|
||||
/// Basic mode, when all discriminators are serialized as is row by row.
|
||||
/// Compact mode, when we avoid writing the same discriminators in granules when there is
|
||||
/// only one variant (or only NULLs) in the granule.
|
||||
/// In compact mode we serialize granules in the following format:
|
||||
/// <number of rows in granule><granule format><granule data>
|
||||
/// There are 2 different formats of granule - plain and compact.
|
||||
/// Plain format is used when there are different discriminators in this granule,
|
||||
/// in this format all discriminators are serialized as is row by row.
|
||||
/// Compact format is used when all discriminators are the same in this granule,
|
||||
/// in this case only this single discriminator is serialized.
|
||||
/// Each variant is serialized into a separate stream with path VariantElements/VariantElement
|
||||
/// (VariantElements stream is needed for correct sub-columns creation). We store and serialize
|
||||
/// variants in a sparse form (the size of a variant column equals to the number of its discriminator
|
||||
@ -32,6 +51,25 @@ namespace DB
|
||||
class SerializationVariant : public ISerialization
|
||||
{
|
||||
public:
|
||||
struct DiscriminatorsSerializationMode
|
||||
{
|
||||
enum Value
|
||||
{
|
||||
BASIC = 0, /// Store the whole discriminators column.
|
||||
COMPACT = 1, /// Don't write discriminators in granule if all of them are the same.
|
||||
};
|
||||
|
||||
static void checkMode(UInt64 mode)
|
||||
{
|
||||
if (mode > Value::COMPACT)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for SerializationVariant discriminators column.");
|
||||
}
|
||||
|
||||
explicit DiscriminatorsSerializationMode(UInt64 mode) : value(static_cast<Value>(mode)) { checkMode(mode); }
|
||||
|
||||
Value value;
|
||||
};
|
||||
|
||||
using VariantSerializations = std::vector<SerializationPtr>;
|
||||
|
||||
explicit SerializationVariant(
|
||||
@ -123,8 +161,44 @@ public:
|
||||
static std::vector<size_t> getVariantsDeserializeTextOrder(const DataTypes & variant_types);
|
||||
|
||||
private:
|
||||
friend SerializationVariantElement;
|
||||
friend SerializationVariantElementNullMap;
|
||||
|
||||
void addVariantElementToPath(SubstreamPath & path, size_t i) const;
|
||||
|
||||
enum CompactDiscriminatorsGranuleFormat
|
||||
{
|
||||
PLAIN = 0, /// Granule has different discriminators and they are serialized as is row by row.
|
||||
COMPACT = 1, /// Granule has single discriminator for all rows and it is serialized as single value.
|
||||
};
|
||||
|
||||
struct DeserializeBinaryBulkStateVariantDiscriminators : public ISerialization::DeserializeBinaryBulkState
|
||||
{
|
||||
explicit DeserializeBinaryBulkStateVariantDiscriminators(UInt64 mode_) : mode(mode_)
|
||||
{
|
||||
}
|
||||
|
||||
DiscriminatorsSerializationMode mode;
|
||||
|
||||
/// Deserialize state of currently read granule in compact mode.
|
||||
CompactDiscriminatorsGranuleFormat granule_format = CompactDiscriminatorsGranuleFormat::PLAIN;
|
||||
size_t remaining_rows_in_granule = 0;
|
||||
ColumnVariant::Discriminator compact_discr = 0;
|
||||
};
|
||||
|
||||
static DeserializeBinaryBulkStatePtr deserializeDiscriminatorsStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
SubstreamsDeserializeStatesCache * cache);
|
||||
|
||||
std::vector<size_t> deserializeCompactDiscriminators(
|
||||
ColumnPtr & discriminators_column,
|
||||
size_t limit,
|
||||
ReadBuffer * stream,
|
||||
bool continuous_reading,
|
||||
DeserializeBinaryBulkStateVariantDiscriminators & state) const;
|
||||
|
||||
static void readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, ReadBuffer * stream);
|
||||
|
||||
bool tryDeserializeTextEscapedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||
bool tryDeserializeTextQuotedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||
bool tryDeserializeWholeTextImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationNumber.h>
|
||||
#include <DataTypes/Serializations/SerializationVariant.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -12,7 +13,7 @@ namespace ErrorCodes
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState
|
||||
struct SerializationVariantElement::DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState
|
||||
{
|
||||
/// During deserialization discriminators and variant streams can be shared.
|
||||
/// For example we can read several variant elements together: "select v.UInt32, v.String from table",
|
||||
@ -24,7 +25,7 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria
|
||||
/// substream cache correctly.
|
||||
ColumnPtr discriminators;
|
||||
ColumnPtr variant;
|
||||
|
||||
ISerialization::DeserializeBinaryBulkStatePtr discriminators_state;
|
||||
ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
|
||||
};
|
||||
|
||||
@ -65,7 +66,12 @@ void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinary
|
||||
void SerializationVariantElement::deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const
|
||||
{
|
||||
DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache);
|
||||
if (!discriminators_state)
|
||||
return;
|
||||
|
||||
auto variant_element_state = std::make_shared<DeserializeBinaryBulkStateVariantElement>();
|
||||
variant_element_state->discriminators_state = discriminators_state;
|
||||
|
||||
addVariantToPath(settings.path);
|
||||
nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state, cache);
|
||||
@ -86,35 +92,61 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
auto * variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
|
||||
|
||||
/// First, deserialize discriminators from Variant column.
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
|
||||
DeserializeBinaryBulkStateVariantElement * variant_element_state = nullptr;
|
||||
std::optional<size_t> variant_limit;
|
||||
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
|
||||
{
|
||||
variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
|
||||
variant_element_state->discriminators = cached_discriminators;
|
||||
}
|
||||
else
|
||||
else if (auto * discriminators_stream = settings.getter(settings.path))
|
||||
{
|
||||
auto * discriminators_stream = settings.getter(settings.path);
|
||||
if (!discriminators_stream)
|
||||
return;
|
||||
variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
|
||||
auto * discriminators_state = checkAndGetState<SerializationVariant::DeserializeBinaryBulkStateVariantDiscriminators>(variant_element_state->discriminators_state);
|
||||
|
||||
/// If we started to read a new column, reinitialize discriminators column in deserialization state.
|
||||
if (!variant_element_state->discriminators || result_column->empty())
|
||||
variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create();
|
||||
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
|
||||
/// Deserialize discriminators according to serialization mode.
|
||||
if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC)
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
|
||||
else
|
||||
variant_limit = deserializeCompactDiscriminators(
|
||||
variant_element_state->discriminators,
|
||||
variant_discriminator,
|
||||
limit,
|
||||
discriminators_stream,
|
||||
settings.continuous_reading,
|
||||
variant_element_state->discriminators_state,
|
||||
this);
|
||||
|
||||
addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators);
|
||||
}
|
||||
else
|
||||
{
|
||||
settings.path.pop_back();
|
||||
return;
|
||||
}
|
||||
|
||||
settings.path.pop_back();
|
||||
|
||||
/// Iterate through new discriminators to calculate the limit for our variant.
|
||||
/// We could read less than limit discriminators, but we will need actual number of read rows later.
|
||||
size_t num_new_discriminators = variant_element_state->discriminators->size() - result_column->size();
|
||||
|
||||
/// Iterate through new discriminators to calculate the limit for our variant
|
||||
/// if we didn't do it during discriminators deserialization.
|
||||
const auto & discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*variant_element_state->discriminators).getData();
|
||||
size_t discriminators_offset = variant_element_state->discriminators->size() - limit;
|
||||
size_t variant_limit = 0;
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
variant_limit += (discriminators_data[i] == variant_discriminator);
|
||||
size_t discriminators_offset = variant_element_state->discriminators->size() - num_new_discriminators;
|
||||
if (!variant_limit)
|
||||
{
|
||||
variant_limit = 0;
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
*variant_limit += (discriminators_data[i] == variant_discriminator);
|
||||
}
|
||||
|
||||
/// Now we know the limit for our variant and can deserialize it.
|
||||
|
||||
@ -125,19 +157,19 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
auto & nullable_column = assert_cast<ColumnNullable &>(*mutable_column);
|
||||
NullMap & null_map = nullable_column.getNullMapData();
|
||||
/// If we have only our discriminator in range, fill null map with 0.
|
||||
if (variant_limit == limit)
|
||||
if (variant_limit == num_new_discriminators)
|
||||
{
|
||||
null_map.resize_fill(null_map.size() + limit, 0);
|
||||
null_map.resize_fill(null_map.size() + num_new_discriminators, 0);
|
||||
}
|
||||
/// If no our discriminator in current range, fill null map with 1.
|
||||
else if (variant_limit == 0)
|
||||
{
|
||||
null_map.resize_fill(null_map.size() + limit, 1);
|
||||
null_map.resize_fill(null_map.size() + num_new_discriminators, 1);
|
||||
}
|
||||
/// Otherwise we should iterate through discriminators to fill null map.
|
||||
else
|
||||
{
|
||||
null_map.reserve(null_map.size() + limit);
|
||||
null_map.reserve(null_map.size() + num_new_discriminators);
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
null_map.push_back(discriminators_data[i] != variant_discriminator);
|
||||
}
|
||||
@ -159,12 +191,12 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
/// If nothing to deserialize, just insert defaults.
|
||||
if (variant_limit == 0)
|
||||
{
|
||||
mutable_column->insertManyDefaults(limit);
|
||||
mutable_column->insertManyDefaults(num_new_discriminators);
|
||||
return;
|
||||
}
|
||||
|
||||
addVariantToPath(settings.path);
|
||||
nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, variant_limit, settings, variant_element_state->variant_element_state, cache);
|
||||
nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, *variant_limit, settings, variant_element_state->variant_element_state, cache);
|
||||
removeVariantFromPath(settings.path);
|
||||
|
||||
/// If nothing was deserialized when variant_limit > 0
|
||||
@ -173,16 +205,16 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
/// In this case we should just insert default values.
|
||||
if (variant_element_state->variant->empty())
|
||||
{
|
||||
mutable_column->insertManyDefaults(limit);
|
||||
mutable_column->insertManyDefaults(num_new_discriminators);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t variant_offset = variant_element_state->variant->size() - variant_limit;
|
||||
size_t variant_offset = variant_element_state->variant->size() - *variant_limit;
|
||||
|
||||
/// If we have only our discriminator in range, insert the whole range to result column.
|
||||
if (variant_limit == limit)
|
||||
if (variant_limit == num_new_discriminators)
|
||||
{
|
||||
mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, variant_limit);
|
||||
mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, *variant_limit);
|
||||
}
|
||||
/// Otherwise iterate through discriminators and insert value from variant or default value depending on the discriminator.
|
||||
else
|
||||
@ -197,6 +229,59 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
}
|
||||
}
|
||||
|
||||
size_t SerializationVariantElement::deserializeCompactDiscriminators(
|
||||
DB::ColumnPtr & discriminators_column,
|
||||
ColumnVariant::Discriminator variant_discriminator,
|
||||
size_t limit,
|
||||
DB::ReadBuffer * stream,
|
||||
bool continuous_reading,
|
||||
DeserializeBinaryBulkStatePtr & discriminators_state_,
|
||||
const ISerialization * serialization)
|
||||
{
|
||||
auto * discriminators_state = checkAndGetState<SerializationVariant::DeserializeBinaryBulkStateVariantDiscriminators>(discriminators_state_, serialization);
|
||||
auto & discriminators = assert_cast<ColumnVariant::ColumnDiscriminators &>(*discriminators_column->assumeMutable());
|
||||
auto & discriminators_data = discriminators.getData();
|
||||
|
||||
/// Reset state if we are reading from the start of the granule and not from the previous position in the file.
|
||||
if (!continuous_reading)
|
||||
discriminators_state->remaining_rows_in_granule = 0;
|
||||
|
||||
/// Calculate our variant limit during discriminators deserialization.
|
||||
size_t variant_limit = 0;
|
||||
while (limit)
|
||||
{
|
||||
/// If we read all rows from current granule, start reading the next one.
|
||||
if (discriminators_state->remaining_rows_in_granule == 0)
|
||||
{
|
||||
if (stream->eof())
|
||||
return variant_limit;
|
||||
|
||||
SerializationVariant::readDiscriminatorsGranuleStart(*discriminators_state, stream);
|
||||
}
|
||||
|
||||
size_t limit_in_granule = std::min(limit, discriminators_state->remaining_rows_in_granule);
|
||||
if (discriminators_state->granule_format == SerializationVariant::CompactDiscriminatorsGranuleFormat::COMPACT)
|
||||
{
|
||||
auto & data = discriminators.getData();
|
||||
data.resize_fill(data.size() + limit_in_granule, discriminators_state->compact_discr);
|
||||
if (discriminators_state->compact_discr == variant_discriminator)
|
||||
variant_limit += limit_in_granule;
|
||||
}
|
||||
else
|
||||
{
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0);
|
||||
size_t start = discriminators_data.size() - limit_in_granule;
|
||||
for (size_t i = start; i != discriminators_data.size(); ++i)
|
||||
variant_limit += (discriminators_data[i] == variant_discriminator);
|
||||
}
|
||||
|
||||
discriminators_state->remaining_rows_in_granule -= limit_in_granule;
|
||||
limit -= limit_in_granule;
|
||||
}
|
||||
|
||||
return variant_limit;
|
||||
}
|
||||
|
||||
void SerializationVariantElement::addVariantToPath(DB::ISerialization::SubstreamPath & path) const
|
||||
{
|
||||
path.push_back(Substream::VariantElements);
|
||||
@ -214,17 +299,19 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator(
|
||||
const ColumnPtr & local_discriminators_,
|
||||
const String & variant_element_name_,
|
||||
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||
ColumnVariant::Discriminator local_variant_discriminator_)
|
||||
ColumnVariant::Discriminator local_variant_discriminator_,
|
||||
bool make_nullable_)
|
||||
: local_discriminators(local_discriminators_)
|
||||
, variant_element_name(variant_element_name_)
|
||||
, global_variant_discriminator(global_variant_discriminator_)
|
||||
, local_variant_discriminator(local_variant_discriminator_)
|
||||
, make_nullable(make_nullable_)
|
||||
{
|
||||
}
|
||||
|
||||
DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const
|
||||
{
|
||||
return makeNullableOrLowCardinalityNullableSafe(prev);
|
||||
return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev;
|
||||
}
|
||||
|
||||
SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const
|
||||
@ -237,12 +324,12 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB:
|
||||
/// Case when original Variant column contained only one non-empty variant and no NULLs.
|
||||
/// In this case just use this variant.
|
||||
if (prev->size() == local_discriminators->size())
|
||||
return makeNullableOrLowCardinalityNullableSafe(prev);
|
||||
return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev;
|
||||
|
||||
/// If this variant is empty, fill result column with default values.
|
||||
if (prev->empty())
|
||||
{
|
||||
auto res = makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty();
|
||||
auto res = make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty() : prev->cloneEmpty();
|
||||
res->insertManyDefaults(local_discriminators->size());
|
||||
return res;
|
||||
}
|
||||
@ -257,16 +344,16 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB:
|
||||
/// Now we can create new column from null-map and variant column using IColumn::expand.
|
||||
auto res_column = IColumn::mutate(prev);
|
||||
|
||||
/// Special case for LowCardinality. We want the result to be LowCardinality(Nullable),
|
||||
/// Special case for LowCardinality when we want the result to be LowCardinality(Nullable),
|
||||
/// but we don't have a good way to apply null-mask for LowCardinality(), so, we first
|
||||
/// convert our column to LowCardinality(Nullable()) and then use expand which will
|
||||
/// fill rows with 0 in mask with default value (that is NULL).
|
||||
if (prev->lowCardinality())
|
||||
if (make_nullable && prev->lowCardinality())
|
||||
res_column = assert_cast<ColumnLowCardinality &>(*res_column).cloneNullable();
|
||||
|
||||
res_column->expand(null_map, /*inverted = */ true);
|
||||
|
||||
if (res_column->canBeInsideNullable())
|
||||
if (make_nullable && prev->canBeInsideNullable())
|
||||
{
|
||||
auto null_map_col = ColumnUInt8::create();
|
||||
null_map_col->getData() = std::move(null_map);
|
||||
|
@ -9,6 +9,7 @@ namespace DB
|
||||
{
|
||||
|
||||
class SerializationVariant;
|
||||
class SerializationVariantElementNullMap;
|
||||
|
||||
/// Serialization for Variant element when we read it as a subcolumn.
|
||||
class SerializationVariantElement final : public SerializationWrapper
|
||||
@ -66,12 +67,14 @@ public:
|
||||
const String variant_element_name;
|
||||
const ColumnVariant::Discriminator global_variant_discriminator;
|
||||
const ColumnVariant::Discriminator local_variant_discriminator;
|
||||
bool make_nullable;
|
||||
|
||||
VariantSubcolumnCreator(
|
||||
const ColumnPtr & local_discriminators_,
|
||||
const String & variant_element_name_,
|
||||
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||
ColumnVariant::Discriminator local_variant_discriminator_);
|
||||
ColumnVariant::Discriminator local_variant_discriminator_,
|
||||
bool make_nullable_);
|
||||
|
||||
DataTypePtr create(const DataTypePtr & prev) const override;
|
||||
ColumnPtr create(const ColumnPtr & prev) const override;
|
||||
@ -79,6 +82,18 @@ public:
|
||||
};
|
||||
private:
|
||||
friend SerializationVariant;
|
||||
friend SerializationVariantElementNullMap;
|
||||
|
||||
struct DeserializeBinaryBulkStateVariantElement;
|
||||
|
||||
static size_t deserializeCompactDiscriminators(
|
||||
ColumnPtr & discriminators_column,
|
||||
ColumnVariant::Discriminator variant_discriminator,
|
||||
size_t limit,
|
||||
ReadBuffer * stream,
|
||||
bool continuous_reading,
|
||||
DeserializeBinaryBulkStatePtr & discriminators_state_,
|
||||
const ISerialization * serialization);
|
||||
|
||||
void addVariantToPath(SubstreamPath & path) const;
|
||||
void removeVariantFromPath(SubstreamPath & path) const;
|
||||
|
@ -0,0 +1,190 @@
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/Serializations/SerializationNumber.h>
|
||||
#include <DataTypes/Serializations/SerializationVariant.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
struct DeserializeBinaryBulkStateVariantElementNullMap : public ISerialization::DeserializeBinaryBulkState
|
||||
{
|
||||
/// During deserialization discriminators streams can be shared.
|
||||
/// For example we can read several variant elements together: "select v.UInt32, v.String.null from table",
|
||||
/// or we can read the whole variant and some of variant elements or their subcolumns: "select v, v.UInt32.null from table".
|
||||
/// To read the same column from the same stream more than once we use substream cache,
|
||||
/// but this cache stores the whole column, not only the current range.
|
||||
/// During deserialization of variant elements or their subcolumns discriminators column is not stored
|
||||
/// in the result column, so we need to store them inside deserialization state, so we can use
|
||||
/// substream cache correctly.
|
||||
ColumnPtr discriminators;
|
||||
ISerialization::DeserializeBinaryBulkStatePtr discriminators_state;
|
||||
};
|
||||
|
||||
void SerializationVariantElementNullMap::enumerateStreams(
|
||||
DB::ISerialization::EnumerateStreamsSettings & settings,
|
||||
const DB::ISerialization::StreamCallback & callback,
|
||||
const DB::ISerialization::SubstreamData &) const
|
||||
{
|
||||
/// We will need stream for discriminators during deserialization.
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
callback(settings.path);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void SerializationVariantElementNullMap::serializeBinaryBulkStatePrefix(
|
||||
const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElementNullMap");
|
||||
}
|
||||
|
||||
void SerializationVariantElementNullMap::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElementNullMap");
|
||||
}
|
||||
|
||||
void SerializationVariantElementNullMap::deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const
|
||||
{
|
||||
DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache);
|
||||
if (!discriminators_state)
|
||||
return;
|
||||
|
||||
auto variant_element_null_map_state = std::make_shared<DeserializeBinaryBulkStateVariantElementNullMap>();
|
||||
variant_element_null_map_state->discriminators_state = std::move(discriminators_state);
|
||||
state = std::move(variant_element_null_map_state);
|
||||
}
|
||||
|
||||
void SerializationVariantElementNullMap::serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED,
|
||||
"Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationVariantElementNullMap");
|
||||
}
|
||||
|
||||
void SerializationVariantElementNullMap::deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & result_column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
/// Deserialize discriminators from Variant column.
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
|
||||
DeserializeBinaryBulkStateVariantElementNullMap * variant_element_null_map_state = nullptr;
|
||||
std::optional<size_t> variant_limit;
|
||||
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
|
||||
{
|
||||
variant_element_null_map_state = checkAndGetState<DeserializeBinaryBulkStateVariantElementNullMap>(state);
|
||||
variant_element_null_map_state->discriminators = cached_discriminators;
|
||||
}
|
||||
else if (auto * discriminators_stream = settings.getter(settings.path))
|
||||
{
|
||||
variant_element_null_map_state = checkAndGetState<DeserializeBinaryBulkStateVariantElementNullMap>(state);
|
||||
auto * discriminators_state = checkAndGetState<SerializationVariant::DeserializeBinaryBulkStateVariantDiscriminators>(
|
||||
variant_element_null_map_state->discriminators_state);
|
||||
|
||||
/// If we started to read a new column, reinitialize discriminators column in deserialization state.
|
||||
if (!variant_element_null_map_state->discriminators || result_column->empty())
|
||||
variant_element_null_map_state->discriminators = ColumnVariant::ColumnDiscriminators::create();
|
||||
|
||||
/// Deserialize discriminators according to serialization mode.
|
||||
if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC)
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(
|
||||
*variant_element_null_map_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
|
||||
else
|
||||
variant_limit = SerializationVariantElement::deserializeCompactDiscriminators(
|
||||
variant_element_null_map_state->discriminators,
|
||||
variant_discriminator,
|
||||
limit,
|
||||
discriminators_stream,
|
||||
settings.continuous_reading,
|
||||
variant_element_null_map_state->discriminators_state,
|
||||
this);
|
||||
|
||||
addToSubstreamsCache(cache, settings.path, variant_element_null_map_state->discriminators);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// There is no such stream or cached data, it means that there is no Variant column in this part (it could happen after alter table add column).
|
||||
/// In such cases columns are filled with default values, but for null-map column default value should be 1, not 0. Fill column with 1 here instead.
|
||||
MutableColumnPtr mutable_column = result_column->assumeMutable();
|
||||
auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||
data.resize_fill(data.size() + limit, 1);
|
||||
settings.path.pop_back();
|
||||
return;
|
||||
}
|
||||
settings.path.pop_back();
|
||||
|
||||
MutableColumnPtr mutable_column = result_column->assumeMutable();
|
||||
auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||
/// Check if there are no such variant in read range.
|
||||
if (variant_limit && *variant_limit == 0)
|
||||
{
|
||||
data.resize_fill(data.size() + limit, 1);
|
||||
}
|
||||
/// Check if there is only our variant in read range.
|
||||
else if (variant_limit && *variant_limit == limit)
|
||||
{
|
||||
data.resize_fill(data.size() + limit, 0);
|
||||
}
|
||||
/// Iterate through new discriminators to calculate the null map of our variant.
|
||||
else
|
||||
{
|
||||
const auto & discriminators_data
|
||||
= assert_cast<const ColumnVariant::ColumnDiscriminators &>(*variant_element_null_map_state->discriminators).getData();
|
||||
size_t discriminators_offset = variant_element_null_map_state->discriminators->size() - limit;
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
data.push_back(discriminators_data[i] != variant_discriminator);
|
||||
}
|
||||
}
|
||||
|
||||
SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::VariantNullMapSubcolumnCreator(
|
||||
const ColumnPtr & local_discriminators_,
|
||||
const String & variant_element_name_,
|
||||
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||
ColumnVariant::Discriminator local_variant_discriminator_)
|
||||
: local_discriminators(local_discriminators_)
|
||||
, variant_element_name(variant_element_name_)
|
||||
, global_variant_discriminator(global_variant_discriminator_)
|
||||
, local_variant_discriminator(local_variant_discriminator_)
|
||||
{
|
||||
}
|
||||
|
||||
DataTypePtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::DataTypePtr &) const
|
||||
{
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
|
||||
SerializationPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::SerializationPtr &) const
|
||||
{
|
||||
return std::make_shared<SerializationVariantElementNullMap>(variant_element_name, global_variant_discriminator);
|
||||
}
|
||||
|
||||
ColumnPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::ColumnPtr &) const
|
||||
{
|
||||
/// Iterate through discriminators and create null-map for our variant.
|
||||
auto null_map_col = ColumnUInt8::create();
|
||||
auto & null_map_data = null_map_col->getData();
|
||||
null_map_data.reserve(local_discriminators->size());
|
||||
const auto & local_discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*local_discriminators).getData();
|
||||
for (auto local_discr : local_discriminators_data)
|
||||
null_map_data.push_back(local_discr != local_variant_discriminator);
|
||||
|
||||
return null_map_col;
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,107 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/Serializations/SimpleTextSerialization.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
class SerializationVariant;
|
||||
class SerializationVariantElement;
|
||||
|
||||
/// Serialization for Variant element null map when we read it as a subcolumn.
|
||||
/// For example, variant.UInt64.null.
|
||||
/// It requires separate serialization because there is no actual Nullable column
|
||||
/// and we should construct null map from variant discriminators.
|
||||
/// The implementation of deserializeBinaryBulk* methods is similar to SerializationVariantElement,
|
||||
/// but differs in that there is no need to read the actual data of the variant, only discriminators.
|
||||
class SerializationVariantElementNullMap final : public SimpleTextSerialization
|
||||
{
|
||||
public:
|
||||
SerializationVariantElementNullMap(const String & variant_element_name_, ColumnVariant::Discriminator variant_discriminator_)
|
||||
: variant_element_name(variant_element_name_), variant_discriminator(variant_discriminator_)
|
||||
{
|
||||
}
|
||||
|
||||
void enumerateStreams(
|
||||
EnumerateStreamsSettings & settings,
|
||||
const StreamCallback & callback,
|
||||
const SubstreamData & data) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkStateSuffix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsDeserializeStatesCache * cache) const override;
|
||||
|
||||
void serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const override;
|
||||
|
||||
void serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); }
|
||||
bool tryDeserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); }
|
||||
|
||||
struct VariantNullMapSubcolumnCreator : public ISubcolumnCreator
|
||||
{
|
||||
const ColumnPtr local_discriminators;
|
||||
const String variant_element_name;
|
||||
const ColumnVariant::Discriminator global_variant_discriminator;
|
||||
const ColumnVariant::Discriminator local_variant_discriminator;
|
||||
|
||||
VariantNullMapSubcolumnCreator(
|
||||
const ColumnPtr & local_discriminators_,
|
||||
const String & variant_element_name_,
|
||||
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||
ColumnVariant::Discriminator local_variant_discriminator_);
|
||||
|
||||
DataTypePtr create(const DataTypePtr & prev) const override;
|
||||
ColumnPtr create(const ColumnPtr & prev) const override;
|
||||
SerializationPtr create(const SerializationPtr & prev) const override;
|
||||
};
|
||||
private:
|
||||
[[noreturn]] static void throwNoSerialization()
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Text/binary serialization is not implemented for variant element null map subcolumn");
|
||||
}
|
||||
|
||||
friend SerializationVariant;
|
||||
friend SerializationVariantElement;
|
||||
|
||||
/// To be able to deserialize Variant element null map as a subcolumn
|
||||
/// we need variant element type name and global discriminator.
|
||||
String variant_element_name;
|
||||
ColumnVariant::Discriminator variant_discriminator;
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -3,14 +3,14 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
#include <Common/BinStringDecodeHelper.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Common/BinStringDecodeHelper.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -218,10 +218,7 @@ struct UnbinImpl
|
||||
static constexpr auto name = "unbin";
|
||||
static constexpr size_t word_size = 8;
|
||||
|
||||
static void decode(const char * pos, const char * end, char *& out)
|
||||
{
|
||||
binStringDecode(pos, end, out);
|
||||
}
|
||||
static void decode(const char * pos, const char * end, char *& out) { binStringDecode(pos, end, out, word_size); }
|
||||
};
|
||||
|
||||
/// Encode number or string to string with binary or hexadecimal representation
|
||||
@ -651,7 +648,15 @@ public:
|
||||
|
||||
size_t size = in_offsets.size();
|
||||
out_offsets.resize(size);
|
||||
out_vec.resize(in_vec.size() / word_size + size);
|
||||
|
||||
size_t max_out_len = 0;
|
||||
for (size_t i = 0; i < in_offsets.size(); ++i)
|
||||
{
|
||||
const size_t len = in_offsets[i] - (i == 0 ? 0 : in_offsets[i - 1])
|
||||
- /* trailing zero symbol that is always added in ColumnString and that is ignored while decoding */ 1;
|
||||
max_out_len += (len + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1;
|
||||
}
|
||||
out_vec.resize(max_out_len);
|
||||
|
||||
char * begin = reinterpret_cast<char *>(out_vec.data());
|
||||
char * pos = begin;
|
||||
@ -661,6 +666,7 @@ public:
|
||||
{
|
||||
size_t new_offset = in_offsets[i];
|
||||
|
||||
/// `new_offset - 1` because in ColumnString each string is stored with trailing zero byte
|
||||
Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset - 1]), pos);
|
||||
|
||||
out_offsets[i] = pos - begin;
|
||||
@ -668,6 +674,9 @@ public:
|
||||
prev_offset = new_offset;
|
||||
}
|
||||
|
||||
chassert(
|
||||
static_cast<size_t>(pos - begin) <= out_vec.size(),
|
||||
fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size()));
|
||||
out_vec.resize(pos - begin);
|
||||
|
||||
return col_res;
|
||||
@ -680,11 +689,11 @@ public:
|
||||
ColumnString::Offsets & out_offsets = col_res->getOffsets();
|
||||
|
||||
const ColumnString::Chars & in_vec = col_fix_string->getChars();
|
||||
size_t n = col_fix_string->getN();
|
||||
const size_t n = col_fix_string->getN();
|
||||
|
||||
size_t size = col_fix_string->size();
|
||||
out_offsets.resize(size);
|
||||
out_vec.resize(in_vec.size() / word_size + size);
|
||||
out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * size);
|
||||
|
||||
char * begin = reinterpret_cast<char *>(out_vec.data());
|
||||
char * pos = begin;
|
||||
@ -694,6 +703,7 @@ public:
|
||||
{
|
||||
size_t new_offset = prev_offset + n;
|
||||
|
||||
/// here we don't subtract 1 from `new_offset` because in ColumnFixedString strings are stored without trailing zero byte
|
||||
Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset]), pos);
|
||||
|
||||
out_offsets[i] = pos - begin;
|
||||
@ -701,6 +711,9 @@ public:
|
||||
prev_offset = new_offset;
|
||||
}
|
||||
|
||||
chassert(
|
||||
static_cast<size_t>(pos - begin) <= out_vec.size(),
|
||||
fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size()));
|
||||
out_vec.resize(pos - begin);
|
||||
|
||||
return col_res;
|
||||
|
@ -44,7 +44,7 @@ struct Memory : boost::noncopyable, Allocator
|
||||
char * m_data = nullptr;
|
||||
size_t alignment = 0;
|
||||
|
||||
[[maybe_unused]] bool allow_gwp_asan_force_sample;
|
||||
[[maybe_unused]] bool allow_gwp_asan_force_sample{false};
|
||||
|
||||
Memory() = default;
|
||||
|
||||
|
@ -301,13 +301,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const
|
||||
auto & insert_query = query->as<ASTInsertQuery &>();
|
||||
insert_query.async_insert_flush = true;
|
||||
|
||||
InterpreterInsertQuery interpreter(
|
||||
query,
|
||||
query_context,
|
||||
query_context->getSettingsRef().insert_allow_materialized_columns,
|
||||
/* no_squash */ false,
|
||||
/* no_destination */ false,
|
||||
/* async_insert */ false);
|
||||
InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns);
|
||||
auto table = interpreter.getTable(insert_query);
|
||||
auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context);
|
||||
|
||||
@ -787,12 +781,7 @@ try
|
||||
try
|
||||
{
|
||||
interpreter = std::make_unique<InterpreterInsertQuery>(
|
||||
key.query,
|
||||
insert_context,
|
||||
key.settings.insert_allow_materialized_columns,
|
||||
false,
|
||||
false,
|
||||
true);
|
||||
key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true);
|
||||
|
||||
pipeline = interpreter->execute().pipeline;
|
||||
chassert(pipeline.pushing());
|
||||
@ -1011,7 +1000,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
|
||||
}
|
||||
|
||||
Chunk chunk(executor.getResultColumns(), total_rows);
|
||||
chunk.getChunkInfos().add(std::move(chunk_info));
|
||||
chunk.setChunkInfo(std::move(chunk_info));
|
||||
return chunk;
|
||||
}
|
||||
|
||||
@ -1063,7 +1052,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries(
|
||||
}
|
||||
|
||||
Chunk chunk(std::move(result_columns), total_rows);
|
||||
chunk.getChunkInfos().add(std::move(chunk_info));
|
||||
chunk.setChunkInfo(std::move(chunk_info));
|
||||
return chunk;
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,6 @@
|
||||
#include <Interpreters/InterpreterFactory.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#include <Access/Common/AccessFlags.h>
|
||||
|
||||
@ -23,7 +22,6 @@
|
||||
#include <Parsers/ASTCheckQuery.h>
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
|
||||
#include <Processors/Chunk.h>
|
||||
#include <Processors/IAccumulatingTransform.h>
|
||||
#include <Processors/IInflatingTransform.h>
|
||||
#include <Processors/ISimpleTransform.h>
|
||||
@ -93,7 +91,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con
|
||||
return Chunk(std::move(columns), 1);
|
||||
}
|
||||
|
||||
class TableCheckTask : public ChunkInfoCloneable<TableCheckTask>
|
||||
class TableCheckTask : public ChunkInfo
|
||||
{
|
||||
public:
|
||||
TableCheckTask(StorageID table_id, const std::variant<std::monostate, ASTPtr, String> & partition_or_part, ContextPtr context)
|
||||
@ -112,12 +110,6 @@ public:
|
||||
context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID());
|
||||
}
|
||||
|
||||
TableCheckTask(const TableCheckTask & other)
|
||||
: table(other.table)
|
||||
, check_data_tasks(other.check_data_tasks)
|
||||
, is_finished(other.is_finished.load())
|
||||
{}
|
||||
|
||||
std::optional<CheckResult> checkNext() const
|
||||
{
|
||||
if (isFinished())
|
||||
@ -129,8 +121,8 @@ public:
|
||||
std::this_thread::sleep_for(sleep_time);
|
||||
});
|
||||
|
||||
IStorage::DataValidationTasksPtr tmp = check_data_tasks;
|
||||
auto result = table->checkDataNext(tmp);
|
||||
IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks;
|
||||
auto result = table->checkDataNext(check_data_tasks_);
|
||||
is_finished = !result.has_value();
|
||||
return result;
|
||||
}
|
||||
@ -188,7 +180,7 @@ protected:
|
||||
/// source should return at least one row to start pipeline
|
||||
result.addColumn(ColumnUInt8::create(1, 1));
|
||||
/// actual data stored in chunk info
|
||||
result.getChunkInfos().add(std::move(current_check_task));
|
||||
result.setChunkInfo(std::move(current_check_task));
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -288,7 +280,7 @@ public:
|
||||
protected:
|
||||
void transform(Chunk & chunk) override
|
||||
{
|
||||
auto table_check_task = chunk.getChunkInfos().get<TableCheckTask>();
|
||||
auto table_check_task = std::dynamic_pointer_cast<const TableCheckTask>(chunk.getChunkInfo());
|
||||
auto check_result = table_check_task->checkNext();
|
||||
if (!check_result)
|
||||
{
|
||||
|
@ -1776,13 +1776,8 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create)
|
||||
else
|
||||
insert->select = create.select->clone();
|
||||
|
||||
return InterpreterInsertQuery(
|
||||
insert,
|
||||
getContext(),
|
||||
getContext()->getSettingsRef().insert_allow_materialized_columns,
|
||||
/* no_squash */ false,
|
||||
/* no_destination */ false,
|
||||
/* async_isnert */ false).execute();
|
||||
return InterpreterInsertQuery(insert, getContext(),
|
||||
getContext()->getSettingsRef().insert_allow_materialized_columns).execute();
|
||||
}
|
||||
|
||||
return {};
|
||||
|
@ -534,13 +534,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
|
||||
}
|
||||
else if (dynamic_cast<const ASTInsertQuery *>(ast.getExplainedQuery().get()))
|
||||
{
|
||||
InterpreterInsertQuery insert(
|
||||
ast.getExplainedQuery(),
|
||||
getContext(),
|
||||
/* allow_materialized */ false,
|
||||
/* no_squash */ false,
|
||||
/* no_destination */ false,
|
||||
/* async_isnert */ false);
|
||||
InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext());
|
||||
auto io = insert.execute();
|
||||
printPipeline(io.pipeline.getProcessors(), buf);
|
||||
}
|
||||
|
@ -16,7 +16,6 @@
|
||||
#include <Interpreters/getTableExpressions.h>
|
||||
#include <Interpreters/processColumnTransformers.h>
|
||||
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
@ -27,7 +26,6 @@
|
||||
#include <Processors/Transforms/CountingTransform.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Processors/Transforms/MaterializingTransform.h>
|
||||
#include <Processors/Transforms/DeduplicationTokenTransforms.h>
|
||||
#include <Processors/Transforms/SquashingTransform.h>
|
||||
#include <Processors/Transforms/PlanSquashingTransform.h>
|
||||
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
|
||||
@ -40,7 +38,6 @@
|
||||
#include <Common/ThreadStatus.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include "base/defines.h"
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
@ -397,358 +394,28 @@ Chain InterpreterInsertQuery::buildPreSinkChain(
|
||||
return out;
|
||||
}
|
||||
|
||||
std::pair<std::vector<Chain>, std::vector<Chain>> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block)
|
||||
{
|
||||
chassert(presink_streams > 0);
|
||||
chassert(sink_streams > 0);
|
||||
|
||||
ThreadGroupPtr running_group;
|
||||
if (current_thread)
|
||||
running_group = current_thread->getThreadGroup();
|
||||
if (!running_group)
|
||||
running_group = std::make_shared<ThreadGroup>(getContext());
|
||||
|
||||
std::vector<Chain> sink_chains;
|
||||
std::vector<Chain> presink_chains;
|
||||
|
||||
for (size_t i = 0; i < sink_streams; ++i)
|
||||
{
|
||||
auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr,
|
||||
running_group, /* elapsed_counter_ms= */ nullptr);
|
||||
|
||||
sink_chains.emplace_back(std::move(out));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < presink_streams; ++i)
|
||||
{
|
||||
auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block);
|
||||
presink_chains.emplace_back(std::move(out));
|
||||
}
|
||||
|
||||
return {std::move(presink_chains), std::move(sink_chains)};
|
||||
}
|
||||
|
||||
|
||||
QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table)
|
||||
{
|
||||
const Settings & settings = getContext()->getSettingsRef();
|
||||
|
||||
auto metadata_snapshot = table->getInMemoryMetadataPtr();
|
||||
auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);
|
||||
|
||||
bool is_trivial_insert_select = false;
|
||||
|
||||
if (settings.optimize_trivial_insert_select)
|
||||
{
|
||||
const auto & select_query = query.select->as<ASTSelectWithUnionQuery &>();
|
||||
const auto & selects = select_query.list_of_selects->children;
|
||||
const auto & union_modes = select_query.list_of_modes;
|
||||
|
||||
/// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries
|
||||
const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; };
|
||||
|
||||
is_trivial_insert_select =
|
||||
std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all))
|
||||
&& std::all_of(selects.begin(), selects.end(), isTrivialSelect);
|
||||
}
|
||||
|
||||
ContextPtr select_context = getContext();
|
||||
|
||||
if (is_trivial_insert_select)
|
||||
{
|
||||
/** When doing trivial INSERT INTO ... SELECT ... FROM table,
|
||||
* don't need to process SELECT with more than max_insert_threads
|
||||
* and it's reasonable to set block size for SELECT to the desired block size for INSERT
|
||||
* to avoid unnecessary squashing.
|
||||
*/
|
||||
|
||||
Settings new_settings = select_context->getSettings();
|
||||
|
||||
new_settings.max_threads = std::max<UInt64>(1, settings.max_insert_threads);
|
||||
|
||||
if (table->prefersLargeBlocks())
|
||||
{
|
||||
if (settings.min_insert_block_size_rows)
|
||||
new_settings.max_block_size = settings.min_insert_block_size_rows;
|
||||
if (settings.min_insert_block_size_bytes)
|
||||
new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes;
|
||||
}
|
||||
|
||||
auto context_for_trivial_select = Context::createCopy(context);
|
||||
context_for_trivial_select->setSettings(new_settings);
|
||||
context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames());
|
||||
|
||||
select_context = context_for_trivial_select;
|
||||
}
|
||||
|
||||
QueryPipelineBuilder pipeline;
|
||||
|
||||
{
|
||||
auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
|
||||
|
||||
if (settings.allow_experimental_analyzer)
|
||||
{
|
||||
InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options);
|
||||
pipeline = interpreter_select_analyzer.buildQueryPipeline();
|
||||
}
|
||||
else
|
||||
{
|
||||
InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options);
|
||||
pipeline = interpreter_select.buildQueryPipeline();
|
||||
}
|
||||
}
|
||||
|
||||
pipeline.dropTotalsAndExtremes();
|
||||
|
||||
/// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values.
|
||||
if (getContext()->getSettingsRef().insert_null_as_default)
|
||||
{
|
||||
const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName();
|
||||
const auto & query_columns = query_sample_block.getColumnsWithTypeAndName();
|
||||
const auto & output_columns = metadata_snapshot->getColumns();
|
||||
|
||||
if (input_columns.size() == query_columns.size())
|
||||
{
|
||||
for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx)
|
||||
{
|
||||
/// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
|
||||
/// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
|
||||
if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type)
|
||||
&& !isNullableOrLowCardinalityNullable(query_columns[col_idx].type)
|
||||
&& !isVariant(query_columns[col_idx].type)
|
||||
&& !isDynamic(query_columns[col_idx].type)
|
||||
&& output_columns.has(query_columns[col_idx].name))
|
||||
{
|
||||
query_sample_block.setColumn(
|
||||
col_idx,
|
||||
ColumnWithTypeAndName(
|
||||
makeNullableOrLowCardinalityNullable(query_columns[col_idx].column),
|
||||
makeNullableOrLowCardinalityNullable(query_columns[col_idx].type),
|
||||
query_columns[col_idx].name));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto actions_dag = ActionsDAG::makeConvertingActions(
|
||||
pipeline.getHeader().getColumnsWithTypeAndName(),
|
||||
query_sample_block.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Position);
|
||||
auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes));
|
||||
|
||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||
{
|
||||
return std::make_shared<ExpressionTransform>(in_header, actions);
|
||||
});
|
||||
|
||||
/// We need to convert Sparse columns to full, because it's destination storage
|
||||
/// may not support it or may have different settings for applying Sparse serialization.
|
||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||
{
|
||||
return std::make_shared<MaterializingTransform>(in_header);
|
||||
});
|
||||
|
||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||
{
|
||||
auto context_ptr = getContext();
|
||||
auto counting = std::make_shared<CountingTransform>(in_header, nullptr, context_ptr->getQuota());
|
||||
counting->setProcessListElement(context_ptr->getProcessListElement());
|
||||
counting->setProgressCallback(context_ptr->getProgressCallback());
|
||||
|
||||
return counting;
|
||||
});
|
||||
|
||||
size_t num_select_threads = pipeline.getNumThreads();
|
||||
|
||||
pipeline.resize(1);
|
||||
|
||||
if (shouldAddSquashingFroStorage(table))
|
||||
{
|
||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||
{
|
||||
return std::make_shared<PlanSquashingTransform>(
|
||||
in_header,
|
||||
table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||
table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL);
|
||||
});
|
||||
}
|
||||
|
||||
pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr
|
||||
{
|
||||
return std::make_shared<DeduplicationToken::AddTokenInfoTransform>(in_header);
|
||||
});
|
||||
|
||||
if (!settings.insert_deduplication_token.value.empty())
|
||||
{
|
||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||
{
|
||||
return std::make_shared<DeduplicationToken::SetUserTokenTransform>(settings.insert_deduplication_token.value, in_header);
|
||||
});
|
||||
|
||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||
{
|
||||
return std::make_shared<DeduplicationToken::SetSourceBlockNumberTransform>(in_header);
|
||||
});
|
||||
}
|
||||
|
||||
/// Number of streams works like this:
|
||||
/// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever
|
||||
/// InterpreterSelectQuery ends up with.
|
||||
/// * Use `max_insert_threads` streams for various insert-preparation steps, e.g.
|
||||
/// materializing and squashing (too slow to do in one thread). That's `presink_chains`.
|
||||
/// * If the table supports parallel inserts, use max_insert_threads for writing to IStorage.
|
||||
/// Otherwise ResizeProcessor them down to 1 stream.
|
||||
|
||||
size_t presink_streams_size = std::max<size_t>(settings.max_insert_threads, pipeline.getNumStreams());
|
||||
|
||||
size_t sink_streams_size = table->supportsParallelInsert() ? std::max<size_t>(1, settings.max_insert_threads) : 1;
|
||||
|
||||
if (!settings.parallel_view_processing)
|
||||
{
|
||||
auto table_id = table->getStorageID();
|
||||
auto views = DatabaseCatalog::instance().getDependentViews(table_id);
|
||||
|
||||
if (table->isView() || !views.empty())
|
||||
sink_streams_size = 1;
|
||||
}
|
||||
|
||||
auto [presink_chains, sink_chains] = buildPreAndSinkChains(
|
||||
presink_streams_size, sink_streams_size,
|
||||
table, metadata_snapshot, query_sample_block);
|
||||
|
||||
pipeline.resize(presink_chains.size());
|
||||
|
||||
if (shouldAddSquashingFroStorage(table))
|
||||
{
|
||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||
{
|
||||
return std::make_shared<ApplySquashingTransform>(
|
||||
in_header,
|
||||
table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||
table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL);
|
||||
});
|
||||
}
|
||||
|
||||
for (auto & chain : presink_chains)
|
||||
pipeline.addResources(chain.detachResources());
|
||||
pipeline.addChains(std::move(presink_chains));
|
||||
|
||||
pipeline.resize(sink_streams_size);
|
||||
|
||||
for (auto & chain : sink_chains)
|
||||
pipeline.addResources(chain.detachResources());
|
||||
pipeline.addChains(std::move(sink_chains));
|
||||
|
||||
if (!settings.parallel_view_processing)
|
||||
{
|
||||
/// Don't use more threads for INSERT than for SELECT to reduce memory consumption.
|
||||
if (pipeline.getNumThreads() > num_select_threads)
|
||||
pipeline.setMaxThreads(num_select_threads);
|
||||
}
|
||||
else if (pipeline.getNumThreads() < settings.max_threads)
|
||||
{
|
||||
/// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select,
|
||||
/// however in case of parallel_view_processing and multiple views, views can still be processed in parallel.
|
||||
///
|
||||
/// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads.
|
||||
pipeline.setMaxThreads(settings.max_threads);
|
||||
}
|
||||
|
||||
pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr
|
||||
{
|
||||
return std::make_shared<EmptySink>(cur_header);
|
||||
});
|
||||
|
||||
return QueryPipelineBuilder::getPipeline(std::move(pipeline));
|
||||
}
|
||||
|
||||
|
||||
QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table)
|
||||
{
|
||||
const Settings & settings = getContext()->getSettingsRef();
|
||||
|
||||
auto metadata_snapshot = table->getInMemoryMetadataPtr();
|
||||
auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);
|
||||
|
||||
Chain chain;
|
||||
|
||||
{
|
||||
auto [presink_chains, sink_chains] = buildPreAndSinkChains(
|
||||
/* presink_streams */1, /* sink_streams */1,
|
||||
table, metadata_snapshot, query_sample_block);
|
||||
|
||||
chain = std::move(presink_chains.front());
|
||||
chain.appendChain(std::move(sink_chains.front()));
|
||||
}
|
||||
|
||||
if (!settings.insert_deduplication_token.value.empty())
|
||||
{
|
||||
chain.addSource(std::make_shared<DeduplicationToken::SetSourceBlockNumberTransform>(chain.getInputHeader()));
|
||||
chain.addSource(std::make_shared<DeduplicationToken::SetUserTokenTransform>(settings.insert_deduplication_token.value, chain.getInputHeader()));
|
||||
}
|
||||
|
||||
chain.addSource(std::make_shared<DeduplicationToken::AddTokenInfoTransform>(chain.getInputHeader()));
|
||||
|
||||
if (shouldAddSquashingFroStorage(table))
|
||||
{
|
||||
bool table_prefers_large_blocks = table->prefersLargeBlocks();
|
||||
|
||||
auto squashing = std::make_shared<ApplySquashingTransform>(
|
||||
chain.getInputHeader(),
|
||||
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
||||
|
||||
chain.addSource(std::move(squashing));
|
||||
|
||||
auto balancing = std::make_shared<PlanSquashingTransform>(
|
||||
chain.getInputHeader(),
|
||||
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
||||
|
||||
chain.addSource(std::move(balancing));
|
||||
}
|
||||
|
||||
auto context_ptr = getContext();
|
||||
auto counting = std::make_shared<CountingTransform>(chain.getInputHeader(), nullptr, context_ptr->getQuota());
|
||||
counting->setProcessListElement(context_ptr->getProcessListElement());
|
||||
counting->setProgressCallback(context_ptr->getProgressCallback());
|
||||
chain.addSource(std::move(counting));
|
||||
|
||||
QueryPipeline pipeline = QueryPipeline(std::move(chain));
|
||||
|
||||
pipeline.setNumThreads(std::min<size_t>(pipeline.getNumThreads(), settings.max_threads));
|
||||
pipeline.setConcurrencyControl(settings.use_concurrency_control);
|
||||
|
||||
if (query.hasInlinedData() && !async_insert)
|
||||
{
|
||||
/// can execute without additional data
|
||||
auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr);
|
||||
for (auto && buffer : owned_buffers)
|
||||
format->addBuffer(std::move(buffer));
|
||||
|
||||
auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr);
|
||||
pipeline.complete(std::move(pipe));
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
|
||||
BlockIO InterpreterInsertQuery::execute()
|
||||
{
|
||||
const Settings & settings = getContext()->getSettingsRef();
|
||||
auto & query = query_ptr->as<ASTInsertQuery &>();
|
||||
|
||||
QueryPipelineBuilder pipeline;
|
||||
std::optional<QueryPipeline> distributed_pipeline;
|
||||
QueryPlanResourceHolder resources;
|
||||
|
||||
StoragePtr table = getTable(query);
|
||||
checkStorageSupportsTransactionsIfNeeded(table, getContext());
|
||||
|
||||
StoragePtr inner_table;
|
||||
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
|
||||
inner_table = mv->getTargetTable();
|
||||
|
||||
if (query.partition_by && !table->supportsPartitionBy())
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage");
|
||||
|
||||
auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout);
|
||||
|
||||
auto metadata_snapshot = table->getInMemoryMetadataPtr();
|
||||
|
||||
auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);
|
||||
|
||||
/// For table functions we check access while executing
|
||||
@ -756,43 +423,320 @@ BlockIO InterpreterInsertQuery::execute()
|
||||
if (!query.table_function)
|
||||
getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames());
|
||||
|
||||
if (!allow_materialized)
|
||||
if (query.select && settings.parallel_distributed_insert_select)
|
||||
// Distributed INSERT SELECT
|
||||
distributed_pipeline = table->distributedWrite(query, getContext());
|
||||
|
||||
std::vector<Chain> presink_chains;
|
||||
std::vector<Chain> sink_chains;
|
||||
if (!distributed_pipeline)
|
||||
{
|
||||
for (const auto & column : metadata_snapshot->getColumns())
|
||||
if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name);
|
||||
/// Number of streams works like this:
|
||||
/// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever
|
||||
/// InterpreterSelectQuery ends up with.
|
||||
/// * Use `max_insert_threads` streams for various insert-preparation steps, e.g.
|
||||
/// materializing and squashing (too slow to do in one thread). That's `presink_chains`.
|
||||
/// * If the table supports parallel inserts, use the same streams for writing to IStorage.
|
||||
/// Otherwise ResizeProcessor them down to 1 stream.
|
||||
/// * If it's not an INSERT SELECT, forget all that and use one stream.
|
||||
size_t pre_streams_size = 1;
|
||||
size_t sink_streams_size = 1;
|
||||
|
||||
if (query.select)
|
||||
{
|
||||
bool is_trivial_insert_select = false;
|
||||
|
||||
if (settings.optimize_trivial_insert_select)
|
||||
{
|
||||
const auto & select_query = query.select->as<ASTSelectWithUnionQuery &>();
|
||||
const auto & selects = select_query.list_of_selects->children;
|
||||
const auto & union_modes = select_query.list_of_modes;
|
||||
|
||||
/// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries
|
||||
const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; };
|
||||
|
||||
is_trivial_insert_select =
|
||||
std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all))
|
||||
&& std::all_of(selects.begin(), selects.end(), isTrivialSelect);
|
||||
}
|
||||
|
||||
if (is_trivial_insert_select)
|
||||
{
|
||||
/** When doing trivial INSERT INTO ... SELECT ... FROM table,
|
||||
* don't need to process SELECT with more than max_insert_threads
|
||||
* and it's reasonable to set block size for SELECT to the desired block size for INSERT
|
||||
* to avoid unnecessary squashing.
|
||||
*/
|
||||
|
||||
Settings new_settings = getContext()->getSettings();
|
||||
|
||||
new_settings.max_threads = std::max<UInt64>(1, settings.max_insert_threads);
|
||||
|
||||
if (table->prefersLargeBlocks())
|
||||
{
|
||||
if (settings.min_insert_block_size_rows)
|
||||
new_settings.max_block_size = settings.min_insert_block_size_rows;
|
||||
if (settings.min_insert_block_size_bytes)
|
||||
new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes;
|
||||
}
|
||||
|
||||
auto new_context = Context::createCopy(context);
|
||||
new_context->setSettings(new_settings);
|
||||
new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames());
|
||||
|
||||
auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
|
||||
|
||||
if (settings.allow_experimental_analyzer)
|
||||
{
|
||||
InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options);
|
||||
pipeline = interpreter_select_analyzer.buildQueryPipeline();
|
||||
}
|
||||
else
|
||||
{
|
||||
InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options);
|
||||
pipeline = interpreter_select.buildQueryPipeline();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Passing 1 as subquery_depth will disable limiting size of intermediate result.
|
||||
auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
|
||||
|
||||
if (settings.allow_experimental_analyzer)
|
||||
{
|
||||
InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options);
|
||||
pipeline = interpreter_select_analyzer.buildQueryPipeline();
|
||||
}
|
||||
else
|
||||
{
|
||||
InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options);
|
||||
pipeline = interpreter_select.buildQueryPipeline();
|
||||
}
|
||||
}
|
||||
|
||||
pipeline.dropTotalsAndExtremes();
|
||||
|
||||
if (settings.max_insert_threads > 1)
|
||||
{
|
||||
auto table_id = table->getStorageID();
|
||||
auto views = DatabaseCatalog::instance().getDependentViews(table_id);
|
||||
|
||||
/// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them.
|
||||
/// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts.
|
||||
const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert();
|
||||
pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads
|
||||
: std::min<size_t>(settings.max_insert_threads, pipeline.getNumStreams());
|
||||
|
||||
/// Deduplication when passing insert_deduplication_token breaks if using more than one thread
|
||||
if (!settings.insert_deduplication_token.toString().empty())
|
||||
{
|
||||
LOG_DEBUG(
|
||||
getLogger("InsertQuery"),
|
||||
"Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues");
|
||||
pre_streams_size = 1;
|
||||
}
|
||||
|
||||
if (table->supportsParallelInsert())
|
||||
sink_streams_size = pre_streams_size;
|
||||
}
|
||||
|
||||
pipeline.resize(pre_streams_size);
|
||||
|
||||
/// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values.
|
||||
if (getContext()->getSettingsRef().insert_null_as_default)
|
||||
{
|
||||
const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName();
|
||||
const auto & query_columns = query_sample_block.getColumnsWithTypeAndName();
|
||||
const auto & output_columns = metadata_snapshot->getColumns();
|
||||
|
||||
if (input_columns.size() == query_columns.size())
|
||||
{
|
||||
for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx)
|
||||
{
|
||||
/// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
|
||||
/// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
|
||||
if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type)
|
||||
&& !isNullableOrLowCardinalityNullable(query_columns[col_idx].type)
|
||||
&& !isVariant(query_columns[col_idx].type)
|
||||
&& !isDynamic(query_columns[col_idx].type)
|
||||
&& output_columns.has(query_columns[col_idx].name))
|
||||
query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ThreadGroupPtr running_group;
|
||||
if (current_thread)
|
||||
running_group = current_thread->getThreadGroup();
|
||||
if (!running_group)
|
||||
running_group = std::make_shared<ThreadGroup>(getContext());
|
||||
for (size_t i = 0; i < sink_streams_size; ++i)
|
||||
{
|
||||
auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr,
|
||||
running_group, /* elapsed_counter_ms= */ nullptr);
|
||||
sink_chains.emplace_back(std::move(out));
|
||||
}
|
||||
for (size_t i = 0; i < pre_streams_size; ++i)
|
||||
{
|
||||
auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block);
|
||||
presink_chains.emplace_back(std::move(out));
|
||||
}
|
||||
}
|
||||
|
||||
BlockIO res;
|
||||
|
||||
if (query.select)
|
||||
/// What type of query: INSERT or INSERT SELECT or INSERT WATCH?
|
||||
if (distributed_pipeline)
|
||||
{
|
||||
if (settings.parallel_distributed_insert_select)
|
||||
res.pipeline = std::move(*distributed_pipeline);
|
||||
}
|
||||
else if (query.select)
|
||||
{
|
||||
const auto & header = presink_chains.at(0).getInputHeader();
|
||||
auto actions_dag = ActionsDAG::makeConvertingActions(
|
||||
pipeline.getHeader().getColumnsWithTypeAndName(),
|
||||
header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Position);
|
||||
auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes));
|
||||
|
||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||
{
|
||||
auto distributed = table->distributedWrite(query, getContext());
|
||||
if (distributed)
|
||||
{
|
||||
res.pipeline = std::move(*distributed);
|
||||
}
|
||||
else
|
||||
{
|
||||
res.pipeline = buildInsertSelectPipeline(query, table);
|
||||
}
|
||||
}
|
||||
else
|
||||
return std::make_shared<ExpressionTransform>(in_header, actions);
|
||||
});
|
||||
|
||||
/// We need to convert Sparse columns to full, because it's destination storage
|
||||
/// may not support it or may have different settings for applying Sparse serialization.
|
||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||
{
|
||||
res.pipeline = buildInsertSelectPipeline(query, table);
|
||||
return std::make_shared<MaterializingTransform>(in_header);
|
||||
});
|
||||
|
||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||
{
|
||||
auto context_ptr = getContext();
|
||||
auto counting = std::make_shared<CountingTransform>(in_header, nullptr, context_ptr->getQuota());
|
||||
counting->setProcessListElement(context_ptr->getProcessListElement());
|
||||
counting->setProgressCallback(context_ptr->getProgressCallback());
|
||||
|
||||
return counting;
|
||||
});
|
||||
|
||||
if (shouldAddSquashingFroStorage(table))
|
||||
{
|
||||
bool table_prefers_large_blocks = table->prefersLargeBlocks();
|
||||
|
||||
size_t threads = presink_chains.size();
|
||||
|
||||
pipeline.resize(1);
|
||||
|
||||
pipeline.addTransform(std::make_shared<PlanSquashingTransform>(
|
||||
header,
|
||||
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
|
||||
|
||||
pipeline.resize(threads);
|
||||
|
||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||
{
|
||||
return std::make_shared<ApplySquashingTransform>(
|
||||
in_header,
|
||||
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
||||
});
|
||||
}
|
||||
|
||||
size_t num_select_threads = pipeline.getNumThreads();
|
||||
|
||||
for (auto & chain : presink_chains)
|
||||
resources = chain.detachResources();
|
||||
for (auto & chain : sink_chains)
|
||||
resources = chain.detachResources();
|
||||
|
||||
pipeline.addChains(std::move(presink_chains));
|
||||
pipeline.resize(sink_chains.size());
|
||||
pipeline.addChains(std::move(sink_chains));
|
||||
|
||||
if (!settings.parallel_view_processing)
|
||||
{
|
||||
/// Don't use more threads for INSERT than for SELECT to reduce memory consumption.
|
||||
if (pipeline.getNumThreads() > num_select_threads)
|
||||
pipeline.setMaxThreads(num_select_threads);
|
||||
}
|
||||
else if (pipeline.getNumThreads() < settings.max_threads)
|
||||
{
|
||||
/// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select,
|
||||
/// however in case of parallel_view_processing and multiple views, views can still be processed in parallel.
|
||||
///
|
||||
/// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads.
|
||||
pipeline.setMaxThreads(settings.max_threads);
|
||||
}
|
||||
|
||||
pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr
|
||||
{
|
||||
return std::make_shared<EmptySink>(cur_header);
|
||||
});
|
||||
|
||||
if (!allow_materialized)
|
||||
{
|
||||
for (const auto & column : metadata_snapshot->getColumns())
|
||||
if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name);
|
||||
}
|
||||
|
||||
res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline));
|
||||
}
|
||||
else
|
||||
{
|
||||
res.pipeline = buildInsertPipeline(query, table);
|
||||
auto & chain = presink_chains.at(0);
|
||||
chain.appendChain(std::move(sink_chains.at(0)));
|
||||
|
||||
if (shouldAddSquashingFroStorage(table))
|
||||
{
|
||||
bool table_prefers_large_blocks = table->prefersLargeBlocks();
|
||||
|
||||
auto squashing = std::make_shared<ApplySquashingTransform>(
|
||||
chain.getInputHeader(),
|
||||
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
||||
|
||||
chain.addSource(std::move(squashing));
|
||||
|
||||
auto balancing = std::make_shared<PlanSquashingTransform>(
|
||||
chain.getInputHeader(),
|
||||
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
||||
|
||||
chain.addSource(std::move(balancing));
|
||||
}
|
||||
|
||||
auto context_ptr = getContext();
|
||||
auto counting = std::make_shared<CountingTransform>(chain.getInputHeader(), nullptr, context_ptr->getQuota());
|
||||
counting->setProcessListElement(context_ptr->getProcessListElement());
|
||||
counting->setProgressCallback(context_ptr->getProgressCallback());
|
||||
chain.addSource(std::move(counting));
|
||||
|
||||
res.pipeline = QueryPipeline(std::move(presink_chains[0]));
|
||||
res.pipeline.setNumThreads(std::min<size_t>(res.pipeline.getNumThreads(), settings.max_threads));
|
||||
res.pipeline.setConcurrencyControl(settings.use_concurrency_control);
|
||||
|
||||
if (query.hasInlinedData() && !async_insert)
|
||||
{
|
||||
/// can execute without additional data
|
||||
auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr);
|
||||
for (auto && buffer : owned_buffers)
|
||||
format->addBuffer(std::move(buffer));
|
||||
|
||||
auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr);
|
||||
res.pipeline.complete(std::move(pipe));
|
||||
}
|
||||
}
|
||||
|
||||
res.pipeline.addStorageHolder(table);
|
||||
res.pipeline.addResources(std::move(resources));
|
||||
|
||||
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
|
||||
res.pipeline.addStorageHolder(mv->getTargetTable());
|
||||
res.pipeline.addStorageHolder(table);
|
||||
if (inner_table)
|
||||
res.pipeline.addStorageHolder(inner_table);
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -813,27 +757,17 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const
|
||||
{
|
||||
extendQueryLogElemImpl(elem, context_);
|
||||
}
|
||||
|
||||
|
||||
void registerInterpreterInsertQuery(InterpreterFactory & factory)
|
||||
{
|
||||
auto create_fn = [] (const InterpreterFactory::Arguments & args)
|
||||
{
|
||||
return std::make_unique<InterpreterInsertQuery>(
|
||||
args.query,
|
||||
args.context,
|
||||
args.allow_materialized,
|
||||
/* no_squash */false,
|
||||
/* no_destination */false,
|
||||
/* async_insert */false);
|
||||
return std::make_unique<InterpreterInsertQuery>(args.query, args.context, args.allow_materialized);
|
||||
};
|
||||
factory.registerInterpreter("InterpreterInsertQuery", create_fn);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -23,10 +23,10 @@ public:
|
||||
InterpreterInsertQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
ContextPtr context_,
|
||||
bool allow_materialized_,
|
||||
bool no_squash_,
|
||||
bool no_destination,
|
||||
bool async_insert_);
|
||||
bool allow_materialized_ = false,
|
||||
bool no_squash_ = false,
|
||||
bool no_destination_ = false,
|
||||
bool async_insert_ = false);
|
||||
|
||||
/** Prepare a request for execution. Return block streams
|
||||
* - the stream into which you can write data to execute the query, if INSERT;
|
||||
@ -73,17 +73,12 @@ private:
|
||||
|
||||
ASTPtr query_ptr;
|
||||
const bool allow_materialized;
|
||||
bool no_squash = false;
|
||||
bool no_destination = false;
|
||||
const bool no_squash;
|
||||
const bool no_destination;
|
||||
const bool async_insert;
|
||||
|
||||
std::vector<std::unique_ptr<ReadBuffer>> owned_buffers;
|
||||
|
||||
std::pair<std::vector<Chain>, std::vector<Chain>> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block);
|
||||
|
||||
QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table);
|
||||
QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table);
|
||||
|
||||
Chain buildSink(
|
||||
const StoragePtr & table,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include <vector>
|
||||
#include <Interpreters/Squashing.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <base/defines.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -12,33 +11,24 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_)
|
||||
: min_block_size_rows(min_block_size_rows_)
|
||||
: header(header_)
|
||||
, min_block_size_rows(min_block_size_rows_)
|
||||
, min_block_size_bytes(min_block_size_bytes_)
|
||||
, header(header_)
|
||||
{
|
||||
}
|
||||
|
||||
Chunk Squashing::flush()
|
||||
{
|
||||
if (!accumulated)
|
||||
return {};
|
||||
|
||||
auto result = convertToChunk(accumulated.extract());
|
||||
chassert(result);
|
||||
return result;
|
||||
return convertToChunk(std::move(chunks_to_merge_vec));
|
||||
}
|
||||
|
||||
Chunk Squashing::squash(Chunk && input_chunk)
|
||||
{
|
||||
if (!input_chunk)
|
||||
if (!input_chunk.hasChunkInfo())
|
||||
return Chunk();
|
||||
|
||||
auto squash_info = input_chunk.getChunkInfos().extract<ChunksToSquash>();
|
||||
|
||||
if (!squash_info)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr");
|
||||
|
||||
return squash(std::move(squash_info->chunks), std::move(input_chunk.getChunkInfos()));
|
||||
const auto *info = getInfoFromChunk(input_chunk);
|
||||
return squash(info->chunks);
|
||||
}
|
||||
|
||||
Chunk Squashing::add(Chunk && input_chunk)
|
||||
@ -47,37 +37,48 @@ Chunk Squashing::add(Chunk && input_chunk)
|
||||
return {};
|
||||
|
||||
/// Just read block is already enough.
|
||||
if (isEnoughSize(input_chunk))
|
||||
if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes()))
|
||||
{
|
||||
/// If no accumulated data, return just read block.
|
||||
if (!accumulated)
|
||||
if (chunks_to_merge_vec.empty())
|
||||
{
|
||||
accumulated.add(std::move(input_chunk));
|
||||
return convertToChunk(accumulated.extract());
|
||||
chunks_to_merge_vec.push_back(std::move(input_chunk));
|
||||
Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
|
||||
chunks_to_merge_vec.clear();
|
||||
return res_chunk;
|
||||
}
|
||||
|
||||
/// Return accumulated data (maybe it has small size) and place new block to accumulated data.
|
||||
Chunk res_chunk = convertToChunk(accumulated.extract());
|
||||
accumulated.add(std::move(input_chunk));
|
||||
Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
|
||||
chunks_to_merge_vec.clear();
|
||||
changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes());
|
||||
chunks_to_merge_vec.push_back(std::move(input_chunk));
|
||||
return res_chunk;
|
||||
}
|
||||
|
||||
/// Accumulated block is already enough.
|
||||
if (isEnoughSize())
|
||||
if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes))
|
||||
{
|
||||
/// Return accumulated data and place new block to accumulated data.
|
||||
Chunk res_chunk = convertToChunk(accumulated.extract());
|
||||
accumulated.add(std::move(input_chunk));
|
||||
Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
|
||||
chunks_to_merge_vec.clear();
|
||||
changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes());
|
||||
chunks_to_merge_vec.push_back(std::move(input_chunk));
|
||||
return res_chunk;
|
||||
}
|
||||
|
||||
/// Pushing data into accumulating vector
|
||||
accumulated.add(std::move(input_chunk));
|
||||
expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes());
|
||||
chunks_to_merge_vec.push_back(std::move(input_chunk));
|
||||
|
||||
/// If accumulated data is big enough, we send it
|
||||
if (isEnoughSize())
|
||||
return convertToChunk(accumulated.extract());
|
||||
|
||||
if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes))
|
||||
{
|
||||
Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
|
||||
changeCurrentSize(0, 0);
|
||||
chunks_to_merge_vec.clear();
|
||||
return res_chunk;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -89,15 +90,14 @@ Chunk Squashing::convertToChunk(std::vector<Chunk> && chunks) const
|
||||
auto info = std::make_shared<ChunksToSquash>();
|
||||
info->chunks = std::move(chunks);
|
||||
|
||||
// It is imortant that chunk is not empty, it has to have columns even if they are empty
|
||||
auto aggr_chunk = Chunk(header.getColumns(), 0);
|
||||
aggr_chunk.getChunkInfos().add(std::move(info));
|
||||
chassert(aggr_chunk);
|
||||
return aggr_chunk;
|
||||
chunks.clear();
|
||||
|
||||
return Chunk(header.cloneEmptyColumns(), 0, info);
|
||||
}
|
||||
|
||||
Chunk Squashing::squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoCollection && infos)
|
||||
Chunk Squashing::squash(std::vector<Chunk> & input_chunks)
|
||||
{
|
||||
Chunk accumulated_chunk;
|
||||
std::vector<IColumn::MutablePtr> mutable_columns = {};
|
||||
size_t rows = 0;
|
||||
for (const Chunk & chunk : input_chunks)
|
||||
@ -119,17 +119,35 @@ Chunk Squashing::squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoColl
|
||||
for (size_t j = 0, size = mutable_columns.size(); j < size; ++j)
|
||||
{
|
||||
const auto source_column = columns[j];
|
||||
|
||||
mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size());
|
||||
}
|
||||
}
|
||||
accumulated_chunk.setColumns(std::move(mutable_columns), rows);
|
||||
return accumulated_chunk;
|
||||
}
|
||||
|
||||
Chunk result;
|
||||
result.setColumns(std::move(mutable_columns), rows);
|
||||
result.setChunkInfos(infos);
|
||||
result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos()));
|
||||
const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk)
|
||||
{
|
||||
const auto& info = chunk.getChunkInfo();
|
||||
const auto * agg_info = typeid_cast<const ChunksToSquash *>(info.get());
|
||||
|
||||
chassert(result);
|
||||
return result;
|
||||
if (!agg_info)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr");
|
||||
|
||||
return agg_info;
|
||||
}
|
||||
|
||||
void Squashing::expandCurrentSize(size_t rows, size_t bytes)
|
||||
{
|
||||
accumulated_size.rows += rows;
|
||||
accumulated_size.bytes += bytes;
|
||||
}
|
||||
|
||||
void Squashing::changeCurrentSize(size_t rows, size_t bytes)
|
||||
{
|
||||
accumulated_size.rows = rows;
|
||||
accumulated_size.bytes = bytes;
|
||||
}
|
||||
|
||||
bool Squashing::isEnoughSize(size_t rows, size_t bytes) const
|
||||
@ -138,28 +156,4 @@ bool Squashing::isEnoughSize(size_t rows, size_t bytes) const
|
||||
|| (min_block_size_rows && rows >= min_block_size_rows)
|
||||
|| (min_block_size_bytes && bytes >= min_block_size_bytes);
|
||||
}
|
||||
|
||||
bool Squashing::isEnoughSize() const
|
||||
{
|
||||
return isEnoughSize(accumulated.getRows(), accumulated.getBytes());
|
||||
};
|
||||
|
||||
bool Squashing::isEnoughSize(const Chunk & chunk) const
|
||||
{
|
||||
return isEnoughSize(chunk.getNumRows(), chunk.bytes());
|
||||
}
|
||||
|
||||
void Squashing::CurrentSize::add(Chunk && chunk)
|
||||
{
|
||||
rows += chunk.getNumRows();
|
||||
bytes += chunk.bytes();
|
||||
chunks.push_back(std::move(chunk));
|
||||
}
|
||||
|
||||
std::vector<Chunk> Squashing::CurrentSize::extract()
|
||||
{
|
||||
auto result = std::move(chunks);
|
||||
*this = {};
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
@ -8,18 +8,9 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ChunksToSquash : public ChunkInfoCloneable<ChunksToSquash>
|
||||
struct ChunksToSquash : public ChunkInfo
|
||||
{
|
||||
public:
|
||||
ChunksToSquash() = default;
|
||||
ChunksToSquash(const ChunksToSquash & other)
|
||||
{
|
||||
chunks.reserve(other.chunks.size());
|
||||
for (const auto & chunk: other.chunks)
|
||||
chunks.push_back(chunk.clone());
|
||||
}
|
||||
|
||||
std::vector<Chunk> chunks = {};
|
||||
mutable std::vector<Chunk> chunks = {};
|
||||
};
|
||||
|
||||
/** Merging consecutive passed blocks to specified minimum size.
|
||||
@ -45,35 +36,32 @@ public:
|
||||
static Chunk squash(Chunk && input_chunk);
|
||||
Chunk flush();
|
||||
|
||||
void setHeader(Block header_) { header = std::move(header_); }
|
||||
const Block & getHeader() const { return header; }
|
||||
|
||||
private:
|
||||
class CurrentSize
|
||||
bool isDataLeft()
|
||||
{
|
||||
return !chunks_to_merge_vec.empty();
|
||||
}
|
||||
|
||||
Block header;
|
||||
private:
|
||||
struct CurrentSize
|
||||
{
|
||||
std::vector<Chunk> chunks = {};
|
||||
size_t rows = 0;
|
||||
size_t bytes = 0;
|
||||
|
||||
public:
|
||||
explicit operator bool () const { return !chunks.empty(); }
|
||||
size_t getRows() const { return rows; }
|
||||
size_t getBytes() const { return bytes; }
|
||||
void add(Chunk && chunk);
|
||||
std::vector<Chunk> extract();
|
||||
};
|
||||
|
||||
const size_t min_block_size_rows;
|
||||
const size_t min_block_size_bytes;
|
||||
Block header;
|
||||
std::vector<Chunk> chunks_to_merge_vec = {};
|
||||
size_t min_block_size_rows;
|
||||
size_t min_block_size_bytes;
|
||||
|
||||
CurrentSize accumulated;
|
||||
CurrentSize accumulated_size;
|
||||
|
||||
static Chunk squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoCollection && infos);
|
||||
static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk);
|
||||
|
||||
bool isEnoughSize() const;
|
||||
static Chunk squash(std::vector<Chunk> & input_chunks);
|
||||
|
||||
void expandCurrentSize(size_t rows, size_t bytes);
|
||||
void changeCurrentSize(size_t rows, size_t bytes);
|
||||
bool isEnoughSize(size_t rows, size_t bytes) const;
|
||||
bool isEnoughSize(const Chunk & chunk) const;
|
||||
|
||||
Chunk convertToChunk(std::vector<Chunk> && chunks) const;
|
||||
};
|
||||
|
@ -538,13 +538,7 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
|
||||
insert_context->makeQueryContext();
|
||||
addSettingsForQuery(insert_context, IAST::QueryKind::Insert);
|
||||
|
||||
InterpreterInsertQuery interpreter(
|
||||
query_ptr,
|
||||
insert_context,
|
||||
/* allow_materialized */ false,
|
||||
/* no_squash */ false,
|
||||
/* no_destination */ false,
|
||||
/* async_isnert */ false);
|
||||
InterpreterInsertQuery interpreter(query_ptr, insert_context);
|
||||
BlockIO io = interpreter.execute();
|
||||
|
||||
PushingPipelineExecutor executor(io.pipeline);
|
||||
|
@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
|
||||
}
|
||||
}
|
||||
|
||||
/// Check for dynamic subcolumns in unknown required columns.
|
||||
/// Check for dynamic subcolums in unknown required columns.
|
||||
if (!unknown_required_source_columns.empty())
|
||||
{
|
||||
for (const NameAndTypePair & pair : source_columns_ordinary)
|
||||
|
@ -1129,11 +1129,11 @@ inline static bool makeHexOrBinStringLiteral(IParser::Pos & pos, ASTPtr & node,
|
||||
|
||||
if (hex)
|
||||
{
|
||||
hexStringDecode(str_begin, str_end, res_pos);
|
||||
hexStringDecode(str_begin, str_end, res_pos, word_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
binStringDecode(str_begin, str_end, res_pos);
|
||||
binStringDecode(str_begin, str_end, res_pos, word_size);
|
||||
}
|
||||
|
||||
return makeStringLiteral(pos, node, String(reinterpret_cast<char *>(res.data()), (res_pos - res_begin - 1)));
|
||||
|
@ -19,6 +19,14 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns
|
||||
checkNumRowsIsConsistent();
|
||||
}
|
||||
|
||||
Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
|
||||
: columns(std::move(columns_))
|
||||
, num_rows(num_rows_)
|
||||
, chunk_info(std::move(chunk_info_))
|
||||
{
|
||||
checkNumRowsIsConsistent();
|
||||
}
|
||||
|
||||
static Columns unmuteColumns(MutableColumns && mutable_columns)
|
||||
{
|
||||
Columns columns;
|
||||
@ -35,11 +43,17 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_)
|
||||
checkNumRowsIsConsistent();
|
||||
}
|
||||
|
||||
Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
|
||||
: columns(unmuteColumns(std::move(columns_)))
|
||||
, num_rows(num_rows_)
|
||||
, chunk_info(std::move(chunk_info_))
|
||||
{
|
||||
checkNumRowsIsConsistent();
|
||||
}
|
||||
|
||||
Chunk Chunk::clone() const
|
||||
{
|
||||
auto tmp = Chunk(getColumns(), getNumRows());
|
||||
tmp.setChunkInfos(chunk_infos.clone());
|
||||
return tmp;
|
||||
return Chunk(getColumns(), getNumRows(), chunk_info);
|
||||
}
|
||||
|
||||
void Chunk::setColumns(Columns columns_, UInt64 num_rows_)
|
||||
|
@ -1,9 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/CollectionOfDerived.h>
|
||||
#include <Columns/IColumn.h>
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -11,29 +9,11 @@ namespace DB
|
||||
class ChunkInfo
|
||||
{
|
||||
public:
|
||||
using Ptr = std::shared_ptr<ChunkInfo>;
|
||||
|
||||
ChunkInfo() = default;
|
||||
ChunkInfo(const ChunkInfo&) = default;
|
||||
ChunkInfo(ChunkInfo&&) = default;
|
||||
|
||||
virtual Ptr clone() const = 0;
|
||||
virtual ~ChunkInfo() = default;
|
||||
ChunkInfo() = default;
|
||||
};
|
||||
|
||||
|
||||
template<class Derived>
|
||||
class ChunkInfoCloneable : public ChunkInfo
|
||||
{
|
||||
public:
|
||||
ChunkInfoCloneable() = default;
|
||||
ChunkInfoCloneable(const ChunkInfoCloneable & other) = default;
|
||||
|
||||
Ptr clone() const override
|
||||
{
|
||||
return std::static_pointer_cast<ChunkInfo>(std::make_shared<Derived>(*static_cast<const Derived*>(this)));
|
||||
}
|
||||
};
|
||||
using ChunkInfoPtr = std::shared_ptr<const ChunkInfo>;
|
||||
|
||||
/**
|
||||
* Chunk is a list of columns with the same length.
|
||||
@ -52,26 +32,26 @@ public:
|
||||
class Chunk
|
||||
{
|
||||
public:
|
||||
using ChunkInfoCollection = CollectionOfDerivedItems<ChunkInfo>;
|
||||
|
||||
Chunk() = default;
|
||||
Chunk(const Chunk & other) = delete;
|
||||
Chunk(Chunk && other) noexcept
|
||||
: columns(std::move(other.columns))
|
||||
, num_rows(other.num_rows)
|
||||
, chunk_infos(std::move(other.chunk_infos))
|
||||
, chunk_info(std::move(other.chunk_info))
|
||||
{
|
||||
other.num_rows = 0;
|
||||
}
|
||||
|
||||
Chunk(Columns columns_, UInt64 num_rows_);
|
||||
Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_);
|
||||
Chunk(MutableColumns columns_, UInt64 num_rows_);
|
||||
Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_);
|
||||
|
||||
Chunk & operator=(const Chunk & other) = delete;
|
||||
Chunk & operator=(Chunk && other) noexcept
|
||||
{
|
||||
columns = std::move(other.columns);
|
||||
chunk_infos = std::move(other.chunk_infos);
|
||||
chunk_info = std::move(other.chunk_info);
|
||||
num_rows = other.num_rows;
|
||||
other.num_rows = 0;
|
||||
return *this;
|
||||
@ -82,15 +62,15 @@ public:
|
||||
void swap(Chunk & other) noexcept
|
||||
{
|
||||
columns.swap(other.columns);
|
||||
chunk_info.swap(other.chunk_info);
|
||||
std::swap(num_rows, other.num_rows);
|
||||
chunk_infos.swap(other.chunk_infos);
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
num_rows = 0;
|
||||
columns.clear();
|
||||
chunk_infos.clear();
|
||||
chunk_info.reset();
|
||||
}
|
||||
|
||||
const Columns & getColumns() const { return columns; }
|
||||
@ -101,9 +81,9 @@ public:
|
||||
/** Get empty columns with the same types as in block. */
|
||||
MutableColumns cloneEmptyColumns() const;
|
||||
|
||||
ChunkInfoCollection & getChunkInfos() { return chunk_infos; }
|
||||
const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; }
|
||||
void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); }
|
||||
const ChunkInfoPtr & getChunkInfo() const { return chunk_info; }
|
||||
bool hasChunkInfo() const { return chunk_info != nullptr; }
|
||||
void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); }
|
||||
|
||||
UInt64 getNumRows() const { return num_rows; }
|
||||
UInt64 getNumColumns() const { return columns.size(); }
|
||||
@ -127,7 +107,7 @@ public:
|
||||
private:
|
||||
Columns columns;
|
||||
UInt64 num_rows = 0;
|
||||
ChunkInfoCollection chunk_infos;
|
||||
ChunkInfoPtr chunk_info;
|
||||
|
||||
void checkNumRowsIsConsistent();
|
||||
};
|
||||
@ -137,15 +117,11 @@ using Chunks = std::vector<Chunk>;
|
||||
/// AsyncInsert needs two kinds of information:
|
||||
/// - offsets of different sub-chunks
|
||||
/// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`.
|
||||
class AsyncInsertInfo : public ChunkInfoCloneable<AsyncInsertInfo>
|
||||
class AsyncInsertInfo : public ChunkInfo
|
||||
{
|
||||
public:
|
||||
AsyncInsertInfo() = default;
|
||||
AsyncInsertInfo(const AsyncInsertInfo & other) = default;
|
||||
AsyncInsertInfo(const std::vector<size_t> & offsets_, const std::vector<String> & tokens_)
|
||||
: offsets(offsets_)
|
||||
, tokens(tokens_)
|
||||
{}
|
||||
explicit AsyncInsertInfo(const std::vector<size_t> & offsets_, const std::vector<String> & tokens_) : offsets(offsets_), tokens(tokens_) {}
|
||||
|
||||
std::vector<size_t> offsets;
|
||||
std::vector<String> tokens;
|
||||
@ -154,11 +130,9 @@ public:
|
||||
using AsyncInsertInfoPtr = std::shared_ptr<AsyncInsertInfo>;
|
||||
|
||||
/// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults.
|
||||
class ChunkMissingValues : public ChunkInfoCloneable<ChunkMissingValues>
|
||||
class ChunkMissingValues : public ChunkInfo
|
||||
{
|
||||
public:
|
||||
ChunkMissingValues(const ChunkMissingValues & other) = default;
|
||||
|
||||
using RowsBitMask = std::vector<bool>; /// a bit per row for a column
|
||||
|
||||
const RowsBitMask & getDefaultsBitmask(size_t column_idx) const;
|
||||
|
@ -147,10 +147,13 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds)
|
||||
|
||||
block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns());
|
||||
|
||||
if (auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>())
|
||||
if (auto chunk_info = chunk.getChunkInfo())
|
||||
{
|
||||
block.info.bucket_num = agg_info->bucket_num;
|
||||
block.info.is_overflows = agg_info->is_overflows;
|
||||
if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(chunk_info.get()))
|
||||
{
|
||||
block.info.bucket_num = agg_info->bucket_num;
|
||||
block.info.is_overflows = agg_info->is_overflows;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -73,10 +73,13 @@ bool PullingPipelineExecutor::pull(Block & block)
|
||||
}
|
||||
|
||||
block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns());
|
||||
if (auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>())
|
||||
if (auto chunk_info = chunk.getChunkInfo())
|
||||
{
|
||||
block.info.bucket_num = agg_info->bucket_num;
|
||||
block.info.is_overflows = agg_info->is_overflows;
|
||||
if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(chunk_info.get()))
|
||||
{
|
||||
block.info.bucket_num = agg_info->bucket_num;
|
||||
block.info.is_overflows = agg_info->is_overflows;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -179,9 +179,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk)
|
||||
columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count);
|
||||
|
||||
Chunks piece;
|
||||
piece.emplace_back(std::move(columns), count);
|
||||
piece.back().setChunkInfos(concatenated.getChunkInfos());
|
||||
|
||||
piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo());
|
||||
writeRowGroup(std::move(piece));
|
||||
}
|
||||
}
|
||||
|
@ -8,9 +8,8 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header)
|
||||
: IProcessor({std::move(input_header)}, {std::move(output_header)})
|
||||
, input(inputs.front())
|
||||
, output(outputs.front())
|
||||
: IProcessor({std::move(input_header)}, {std::move(output_header)}),
|
||||
input(inputs.front()), output(outputs.front())
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -53,11 +53,13 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num
|
||||
if (!input.chunk.hasRows())
|
||||
return;
|
||||
|
||||
if (input.chunk.getChunkInfos().empty())
|
||||
const auto & info = input.chunk.getChunkInfo();
|
||||
if (!info)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm");
|
||||
|
||||
Int64 allocated_bytes = 0;
|
||||
if (auto arenas_info = input.chunk.getChunkInfos().get<ChunkInfoWithAllocatedBytes>())
|
||||
/// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator.
|
||||
if (const auto * arenas_info = typeid_cast<const ChunkInfoWithAllocatedBytes *>(info.get()))
|
||||
allocated_bytes = arenas_info->allocated_bytes;
|
||||
|
||||
states[source_num] = State{input.chunk, description, allocated_bytes};
|
||||
@ -134,7 +136,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge()
|
||||
info->chunk_num = chunk_num++;
|
||||
|
||||
Chunk chunk;
|
||||
chunk.getChunkInfos().add(std::move(info));
|
||||
chunk.setChunkInfo(std::move(info));
|
||||
return chunk;
|
||||
}
|
||||
|
||||
@ -161,7 +163,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation()
|
||||
chunks.emplace_back(std::move(new_columns), current_rows);
|
||||
}
|
||||
|
||||
chunks.back().getChunkInfos().add(std::make_shared<AggregatedChunkInfo>());
|
||||
chunks.back().setChunkInfo(std::make_shared<AggregatedChunkInfo>());
|
||||
states[i].current_row = states[i].to_row;
|
||||
|
||||
/// We assume that sizes in bytes of rows are almost the same.
|
||||
|
@ -6,22 +6,18 @@ namespace DB
|
||||
{
|
||||
|
||||
/// To carry part level if chunk is produced by a merge tree source
|
||||
class MergeTreePartLevelInfo : public ChunkInfoCloneable<MergeTreePartLevelInfo>
|
||||
class MergeTreePartLevelInfo : public ChunkInfo
|
||||
{
|
||||
public:
|
||||
MergeTreePartLevelInfo() = delete;
|
||||
explicit MergeTreePartLevelInfo(ssize_t part_level)
|
||||
: origin_merge_tree_part_level(part_level)
|
||||
{ }
|
||||
MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default;
|
||||
|
||||
explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { }
|
||||
size_t origin_merge_tree_part_level = 0;
|
||||
};
|
||||
|
||||
inline size_t getPartLevelFromChunk(const Chunk & chunk)
|
||||
{
|
||||
const auto part_level_info = chunk.getChunkInfos().get<MergeTreePartLevelInfo>();
|
||||
if (part_level_info)
|
||||
const auto & info = chunk.getChunkInfo();
|
||||
if (const auto * part_level_info = typeid_cast<const MergeTreePartLevelInfo *>(info.get()))
|
||||
return part_level_info->origin_merge_tree_part_level;
|
||||
return 0;
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ namespace ErrorCodes
|
||||
|
||||
static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false)
|
||||
{
|
||||
chunk->getChunkInfos().add(std::make_shared<ChunkSelectFinalIndices>(std::move(chunk->replace_final_selection)));
|
||||
chunk->setChunkInfo(std::make_shared<ChunkSelectFinalIndices>(std::move(chunk->replace_final_selection)));
|
||||
return IMergingAlgorithm::Status(std::move(*chunk), finished);
|
||||
}
|
||||
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include <Processors/Merges/Algorithms/MergedData.h>
|
||||
#include <Processors/Transforms/ColumnGathererTransform.h>
|
||||
#include <Processors/Merges/Algorithms/RowRef.h>
|
||||
#include <Processors/Chunk.h>
|
||||
|
||||
namespace Poco
|
||||
{
|
||||
@ -15,13 +14,11 @@ namespace DB
|
||||
|
||||
/** Use in skipping final to keep list of indices of selected row after merging final
|
||||
*/
|
||||
struct ChunkSelectFinalIndices : public ChunkInfoCloneable<ChunkSelectFinalIndices>
|
||||
struct ChunkSelectFinalIndices : public ChunkInfo
|
||||
{
|
||||
explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_);
|
||||
ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default;
|
||||
|
||||
const ColumnPtr column_holder;
|
||||
const ColumnUInt64 * select_final_indices = nullptr;
|
||||
explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_);
|
||||
};
|
||||
|
||||
/** Merges several sorted inputs into one.
|
||||
|
@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare()
|
||||
bool is_port_full = !output.canPush();
|
||||
|
||||
/// Push if has data.
|
||||
if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full)
|
||||
if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full)
|
||||
output.push(std::move(state.output_chunk));
|
||||
|
||||
if (!is_initialized)
|
||||
|
@ -129,7 +129,7 @@ public:
|
||||
|
||||
IMergingAlgorithm::Status status = algorithm.merge();
|
||||
|
||||
if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty())
|
||||
if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo())
|
||||
{
|
||||
// std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl;
|
||||
state.output_chunk = std::move(status.chunk);
|
||||
|
@ -20,7 +20,7 @@ public:
|
||||
}
|
||||
|
||||
String getName() const override { return "RemoteSink"; }
|
||||
void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); }
|
||||
void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); }
|
||||
void onFinish() override { RemoteInserter::onFinish(); }
|
||||
};
|
||||
|
||||
|
@ -15,8 +15,9 @@ void SinkToStorage::onConsume(Chunk chunk)
|
||||
*/
|
||||
Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns()));
|
||||
|
||||
consume(chunk);
|
||||
cur_chunk = std::move(chunk);
|
||||
consume(chunk.clone());
|
||||
if (!lastBlockIsDuplicate())
|
||||
cur_chunk = std::move(chunk);
|
||||
}
|
||||
|
||||
SinkToStorage::GenerateResult SinkToStorage::onGenerate()
|
||||
|
@ -18,7 +18,8 @@ public:
|
||||
void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); }
|
||||
|
||||
protected:
|
||||
virtual void consume(Chunk & chunk) = 0;
|
||||
virtual void consume(Chunk chunk) = 0;
|
||||
virtual bool lastBlockIsDuplicate() const { return false; }
|
||||
|
||||
private:
|
||||
std::vector<TableLockHolder> table_locks;
|
||||
@ -37,7 +38,7 @@ class NullSinkToStorage : public SinkToStorage
|
||||
public:
|
||||
using SinkToStorage::SinkToStorage;
|
||||
std::string getName() const override { return "NullSinkToStorage"; }
|
||||
void consume(Chunk &) override {}
|
||||
void consume(Chunk) override {}
|
||||
};
|
||||
|
||||
using SinkPtr = std::shared_ptr<SinkToStorage>;
|
||||
|
@ -43,10 +43,7 @@ protected:
|
||||
info->bucket_num = res.info.bucket_num;
|
||||
info->is_overflows = res.info.is_overflows;
|
||||
|
||||
auto chunk = Chunk(res.getColumns(), res.rows());
|
||||
chunk.getChunkInfos().add(std::move(info));
|
||||
|
||||
return chunk;
|
||||
return Chunk(res.getColumns(), res.rows(), std::move(info));
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -176,7 +176,7 @@ std::optional<Chunk> RemoteSource::tryGenerate()
|
||||
auto info = std::make_shared<AggregatedChunkInfo>();
|
||||
info->bucket_num = block.info.bucket_num;
|
||||
info->is_overflows = block.info.is_overflows;
|
||||
chunk.getChunkInfos().add(std::move(info));
|
||||
chunk.setChunkInfo(std::move(info));
|
||||
}
|
||||
|
||||
return chunk;
|
||||
|
@ -5,9 +5,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_))
|
||||
{
|
||||
}
|
||||
SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {}
|
||||
|
||||
SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows())
|
||||
{
|
||||
@ -22,7 +20,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp
|
||||
auto info = std::make_shared<AggregatedChunkInfo>();
|
||||
info->bucket_num = data.info.bucket_num;
|
||||
info->is_overflows = data.info.is_overflows;
|
||||
chunk.getChunkInfos().add(std::move(info));
|
||||
chunk.setChunkInfo(std::move(info));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate()
|
||||
variants.aggregates_pool = variants.aggregates_pools.at(0).get();
|
||||
|
||||
/// Pass info about used memory by aggregate functions further.
|
||||
to_push_chunk.getChunkInfos().add(std::make_shared<ChunkInfoWithAllocatedBytes>(cur_block_bytes));
|
||||
to_push_chunk.setChunkInfo(std::make_shared<ChunkInfoWithAllocatedBytes>(cur_block_bytes));
|
||||
|
||||
cur_block_bytes = 0;
|
||||
cur_block_size = 0;
|
||||
@ -351,12 +351,11 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati
|
||||
void FinalizeAggregatedTransform::transform(Chunk & chunk)
|
||||
{
|
||||
if (params->final)
|
||||
{
|
||||
finalizeChunk(chunk, aggregates_mask);
|
||||
}
|
||||
else if (!chunk.getChunkInfos().get<AggregatedChunkInfo>())
|
||||
else if (!chunk.getChunkInfo())
|
||||
{
|
||||
chunk.getChunkInfos().add(std::make_shared<AggregatedChunkInfo>());
|
||||
auto info = std::make_shared<AggregatedChunkInfo>();
|
||||
chunk.setChunkInfo(std::move(info));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <Processors/ISimpleTransform.h>
|
||||
#include <Processors/Transforms/AggregatingTransform.h>
|
||||
#include <Processors/Transforms/finalizeChunk.h>
|
||||
#include <Processors/Chunk.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -13,12 +12,10 @@ namespace DB
|
||||
struct InputOrderInfo;
|
||||
using InputOrderInfoPtr = std::shared_ptr<const InputOrderInfo>;
|
||||
|
||||
struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable<ChunkInfoWithAllocatedBytes>
|
||||
struct ChunkInfoWithAllocatedBytes : public ChunkInfo
|
||||
{
|
||||
ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default;
|
||||
explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_)
|
||||
: allocated_bytes(allocated_bytes_) {}
|
||||
|
||||
Int64 allocated_bytes;
|
||||
};
|
||||
|
||||
|
@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block)
|
||||
|
||||
UInt64 num_rows = block.rows();
|
||||
Chunk chunk(block.getColumns(), num_rows);
|
||||
chunk.getChunkInfos().add(std::move(info));
|
||||
chunk.setChunkInfo(std::move(info));
|
||||
|
||||
return chunk;
|
||||
}
|
||||
@ -44,11 +44,15 @@ namespace
|
||||
{
|
||||
const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk)
|
||||
{
|
||||
auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>();
|
||||
const auto & info = chunk.getChunkInfo();
|
||||
if (!info)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk.");
|
||||
|
||||
const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
|
||||
if (!agg_info)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo.");
|
||||
|
||||
return agg_info.get();
|
||||
return agg_info;
|
||||
}
|
||||
|
||||
/// Reads chunks from file in native format. Provide chunks with aggregation info.
|
||||
@ -206,7 +210,11 @@ private:
|
||||
|
||||
void process(Chunk && chunk)
|
||||
{
|
||||
auto chunks_to_merge = chunk.getChunkInfos().get<ChunksToMerge>();
|
||||
if (!chunk.hasChunkInfo())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName());
|
||||
|
||||
const auto & info = chunk.getChunkInfo();
|
||||
const auto * chunks_to_merge = typeid_cast<const ChunksToMerge *>(info.get());
|
||||
if (!chunks_to_merge)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName());
|
||||
|
||||
@ -775,7 +783,7 @@ void AggregatingTransform::initGenerate()
|
||||
{
|
||||
/// Just a reasonable constant, matches default value for the setting `preferred_block_size_bytes`
|
||||
static constexpr size_t oneMB = 1024 * 1024;
|
||||
return std::make_shared<SimpleSquashingTransform>(header, params->params.max_block_size, oneMB);
|
||||
return std::make_shared<SimpleSquashingChunksTransform>(header, params->params.max_block_size, oneMB);
|
||||
});
|
||||
}
|
||||
/// AggregatingTransform::expandPipeline expects single output port.
|
||||
|
@ -2,7 +2,6 @@
|
||||
#include <Compression/CompressedReadBuffer.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <Interpreters/Aggregator.h>
|
||||
#include <Processors/Chunk.h>
|
||||
#include <Processors/IAccumulatingTransform.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/setThreadName.h>
|
||||
@ -20,7 +19,7 @@ namespace CurrentMetrics
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class AggregatedChunkInfo : public ChunkInfoCloneable<AggregatedChunkInfo>
|
||||
class AggregatedChunkInfo : public ChunkInfo
|
||||
{
|
||||
public:
|
||||
bool is_overflows = false;
|
||||
|
@ -27,12 +27,18 @@ public:
|
||||
}
|
||||
|
||||
ExceptionKeepingTransform::work();
|
||||
if (finish_chunk)
|
||||
{
|
||||
data.chunk = std::move(finish_chunk);
|
||||
ready_output = true;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
void onConsume(Chunk chunk) override
|
||||
{
|
||||
cur_chunk = Squashing::squash(std::move(chunk));
|
||||
if (auto res_chunk = DB::Squashing::squash(std::move(chunk)))
|
||||
cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows());
|
||||
}
|
||||
|
||||
GenerateResult onGenerate() override
|
||||
@ -42,10 +48,16 @@ protected:
|
||||
res.is_done = true;
|
||||
return res;
|
||||
}
|
||||
void onFinish() override
|
||||
{
|
||||
auto chunk = DB::Squashing::squash({});
|
||||
finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows());
|
||||
}
|
||||
|
||||
private:
|
||||
Squashing squashing;
|
||||
Chunk cur_chunk;
|
||||
Chunk finish_chunk;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include <Processors/Transforms/CountingTransform.h>
|
||||
|
||||
#include <IO/Progress.h>
|
||||
#include <Interpreters/ProcessList.h>
|
||||
#include <Processors/Transforms/CountingTransform.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/ThreadStatus.h>
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user