mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Merge branch 'ClickHouse:master' into hive_style_partitioning
This commit is contained in:
commit
e646713122
@ -40,8 +40,6 @@ Every month we get together with the community (users, contributors, customers,
|
|||||||
|
|
||||||
Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
|
Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
|
||||||
|
|
||||||
* [AWS Summit in DC](https://clickhouse.com/company/events/2024-06-aws-summit-dc) - Jun 26
|
|
||||||
* [ClickHouse Meetup in Amsterdam](https://www.meetup.com/clickhouse-netherlands-user-group/events/300781068/) - Jun 27
|
|
||||||
* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9
|
* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9
|
||||||
* [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9
|
* [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9
|
||||||
* [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9
|
* [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9
|
||||||
|
@ -84,5 +84,5 @@ if (CMAKE_CROSSCOMPILING)
|
|||||||
message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
|
message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}")
|
message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILER_TARGET}")
|
||||||
endif ()
|
endif ()
|
||||||
|
2
contrib/orc
vendored
2
contrib/orc
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 947cebaf9432d708253ac08dc3012daa6b4ede6f
|
Subproject commit bcc025c09828c556f54cfbdf83a66b9acae7d17f
|
2
contrib/s2geometry
vendored
2
contrib/s2geometry
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 0146e2d1355828f8f633cb050948250ad7406c57
|
Subproject commit 6522a40338d58752c2a4227a3fc2bc4107c73e43
|
@ -1,7 +1,6 @@
|
|||||||
option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES})
|
option(ENABLE_S2_GEOMETRY "Enable S2 Geometry" ${ENABLE_LIBRARIES})
|
||||||
|
|
||||||
# ARCH_S390X broke upstream, it can be re-enabled once https://github.com/google/s2geometry/pull/372 is merged
|
if (NOT ENABLE_S2_GEOMETRY)
|
||||||
if (NOT ENABLE_S2_GEOMETRY OR ARCH_S390X)
|
|
||||||
message(STATUS "Not using S2 Geometry")
|
message(STATUS "Not using S2 Geometry")
|
||||||
return()
|
return()
|
||||||
endif()
|
endif()
|
||||||
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
|||||||
# lts / testing / prestable / etc
|
# lts / testing / prestable / etc
|
||||||
ARG REPO_CHANNEL="stable"
|
ARG REPO_CHANNEL="stable"
|
||||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||||
ARG VERSION="24.6.1.4423"
|
ARG VERSION="24.6.2.17"
|
||||||
ARG PACKAGES="clickhouse-keeper"
|
ARG PACKAGES="clickhouse-keeper"
|
||||||
ARG DIRECT_DOWNLOAD_URLS=""
|
ARG DIRECT_DOWNLOAD_URLS=""
|
||||||
|
|
||||||
|
47
docker/reqgenerator.py
Normal file
47
docker/reqgenerator.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# To run this script you must install docker and piddeptree python package
|
||||||
|
#
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def build_docker_deps(image_name, imagedir):
|
||||||
|
cmd = f"""docker run --entrypoint "/bin/bash" {image_name} -c "pip install pipdeptree 2>/dev/null 1>/dev/null && pipdeptree --freeze --warn silence | sed 's/ \+//g' | sort | uniq" > {imagedir}/requirements.txt"""
|
||||||
|
subprocess.check_call(cmd, shell=True)
|
||||||
|
|
||||||
|
|
||||||
|
def check_docker_file_install_with_pip(filepath):
|
||||||
|
image_name = None
|
||||||
|
with open(filepath, "r") as f:
|
||||||
|
for line in f:
|
||||||
|
if "docker build" in line:
|
||||||
|
arr = line.split(" ")
|
||||||
|
if len(arr) > 4:
|
||||||
|
image_name = arr[4]
|
||||||
|
if "pip3 install" in line or "pip install" in line:
|
||||||
|
return image_name, True
|
||||||
|
return image_name, False
|
||||||
|
|
||||||
|
|
||||||
|
def process_affected_images(images_dir):
|
||||||
|
for root, _dirs, files in os.walk(images_dir):
|
||||||
|
for f in files:
|
||||||
|
if f == "Dockerfile":
|
||||||
|
docker_file_path = os.path.join(root, f)
|
||||||
|
print("Checking image on path", docker_file_path)
|
||||||
|
image_name, has_pip = check_docker_file_install_with_pip(
|
||||||
|
docker_file_path
|
||||||
|
)
|
||||||
|
if has_pip:
|
||||||
|
print("Find pip in", image_name)
|
||||||
|
try:
|
||||||
|
build_docker_deps(image_name, root)
|
||||||
|
except Exception as ex:
|
||||||
|
print(ex)
|
||||||
|
else:
|
||||||
|
print("Pip not found in", docker_file_path)
|
||||||
|
|
||||||
|
|
||||||
|
process_affected_images(sys.argv[1])
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
|||||||
# lts / testing / prestable / etc
|
# lts / testing / prestable / etc
|
||||||
ARG REPO_CHANNEL="stable"
|
ARG REPO_CHANNEL="stable"
|
||||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||||
ARG VERSION="24.6.1.4423"
|
ARG VERSION="24.6.2.17"
|
||||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||||
ARG DIRECT_DOWNLOAD_URLS=""
|
ARG DIRECT_DOWNLOAD_URLS=""
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
|||||||
|
|
||||||
ARG REPO_CHANNEL="stable"
|
ARG REPO_CHANNEL="stable"
|
||||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||||
ARG VERSION="24.6.1.4423"
|
ARG VERSION="24.6.2.17"
|
||||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||||
|
|
||||||
#docker-official-library:off
|
#docker-official-library:off
|
||||||
|
@ -19,10 +19,7 @@ RUN apt-get update \
|
|||||||
odbcinst \
|
odbcinst \
|
||||||
psmisc \
|
psmisc \
|
||||||
python3 \
|
python3 \
|
||||||
python3-lxml \
|
|
||||||
python3-pip \
|
python3-pip \
|
||||||
python3-requests \
|
|
||||||
python3-termcolor \
|
|
||||||
unixodbc \
|
unixodbc \
|
||||||
pv \
|
pv \
|
||||||
jq \
|
jq \
|
||||||
@ -31,7 +28,8 @@ RUN apt-get update \
|
|||||||
&& apt-get clean \
|
&& apt-get clean \
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||||
|
|
||||||
RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3
|
COPY requirements.txt /
|
||||||
|
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||||
|
|
||||||
# This symlink is required by gcc to find the lld linker
|
# This symlink is required by gcc to find the lld linker
|
||||||
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
|
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
|
||||||
@ -39,6 +37,10 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
|
|||||||
# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d
|
# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d
|
||||||
RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake
|
RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake
|
||||||
|
|
||||||
|
# LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path.
|
||||||
|
# It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792
|
||||||
|
RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu
|
||||||
|
|
||||||
ARG CCACHE_VERSION=4.6.1
|
ARG CCACHE_VERSION=4.6.1
|
||||||
RUN mkdir /tmp/ccache \
|
RUN mkdir /tmp/ccache \
|
||||||
&& cd /tmp/ccache \
|
&& cd /tmp/ccache \
|
||||||
|
41
docker/test/fasttest/requirements.txt
Normal file
41
docker/test/fasttest/requirements.txt
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
Jinja2==3.1.3
|
||||||
|
MarkupSafe==2.1.5
|
||||||
|
PyJWT==2.3.0
|
||||||
|
PyYAML==6.0.1
|
||||||
|
Pygments==2.11.2
|
||||||
|
SecretStorage==3.3.1
|
||||||
|
blinker==1.4
|
||||||
|
certifi==2020.6.20
|
||||||
|
chardet==4.0.0
|
||||||
|
cryptography==3.4.8
|
||||||
|
dbus-python==1.2.18
|
||||||
|
distro==1.7.0
|
||||||
|
httplib2==0.20.2
|
||||||
|
idna==3.3
|
||||||
|
importlib-metadata==4.6.4
|
||||||
|
jeepney==0.7.1
|
||||||
|
keyring==23.5.0
|
||||||
|
launchpadlib==1.10.16
|
||||||
|
lazr.restfulclient==0.14.4
|
||||||
|
lazr.uri==1.0.6
|
||||||
|
lxml==4.8.0
|
||||||
|
more-itertools==8.10.0
|
||||||
|
numpy==1.26.3
|
||||||
|
oauthlib==3.2.0
|
||||||
|
packaging==24.1
|
||||||
|
pandas==1.5.3
|
||||||
|
pip==24.1.1
|
||||||
|
pipdeptree==2.23.0
|
||||||
|
pyparsing==2.4.7
|
||||||
|
python-apt==2.4.0+ubuntu3
|
||||||
|
python-dateutil==2.9.0.post0
|
||||||
|
pytz==2024.1
|
||||||
|
requests==2.32.3
|
||||||
|
scipy==1.12.0
|
||||||
|
setuptools==59.6.0
|
||||||
|
six==1.16.0
|
||||||
|
termcolor==1.1.0
|
||||||
|
urllib3==1.26.5
|
||||||
|
wadllib==1.3.6
|
||||||
|
wheel==0.37.1
|
||||||
|
zipp==1.0.0
|
@ -84,6 +84,8 @@ function start_server
|
|||||||
echo "ClickHouse server pid '$server_pid' started and responded"
|
echo "ClickHouse server pid '$server_pid' started and responded"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export -f start_server
|
||||||
|
|
||||||
function clone_root
|
function clone_root
|
||||||
{
|
{
|
||||||
[ "$UID" -eq 0 ] && git config --global --add safe.directory "$FASTTEST_SOURCE"
|
[ "$UID" -eq 0 ] && git config --global --add safe.directory "$FASTTEST_SOURCE"
|
||||||
@ -254,6 +256,19 @@ function configure
|
|||||||
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
|
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function timeout_with_logging() {
|
||||||
|
local exit_code=0
|
||||||
|
|
||||||
|
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
|
||||||
|
|
||||||
|
if [[ "${exit_code}" -eq "124" ]]
|
||||||
|
then
|
||||||
|
echo "The command 'timeout ${*}' has been killed by timeout"
|
||||||
|
fi
|
||||||
|
|
||||||
|
return $exit_code
|
||||||
|
}
|
||||||
|
|
||||||
function run_tests
|
function run_tests
|
||||||
{
|
{
|
||||||
clickhouse-server --version
|
clickhouse-server --version
|
||||||
@ -292,6 +307,8 @@ function run_tests
|
|||||||
clickhouse stop --pid-path "$FASTTEST_DATA"
|
clickhouse stop --pid-path "$FASTTEST_DATA"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export -f run_tests
|
||||||
|
|
||||||
case "$stage" in
|
case "$stage" in
|
||||||
"")
|
"")
|
||||||
ls -la
|
ls -la
|
||||||
@ -315,7 +332,7 @@ case "$stage" in
|
|||||||
configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt"
|
configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt"
|
||||||
;&
|
;&
|
||||||
"run_tests")
|
"run_tests")
|
||||||
run_tests
|
timeout_with_logging 35m bash -c run_tests ||:
|
||||||
/process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \
|
/process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \
|
||||||
--out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \
|
--out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \
|
||||||
--out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv"
|
--out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv"
|
||||||
|
@ -31,7 +31,8 @@ RUN apt-get update \
|
|||||||
&& apt-get clean \
|
&& apt-get clean \
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||||
|
|
||||||
RUN pip3 install Jinja2
|
COPY requirements.txt /
|
||||||
|
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||||
|
|
||||||
COPY * /
|
COPY * /
|
||||||
|
|
||||||
|
27
docker/test/fuzzer/requirements.txt
Normal file
27
docker/test/fuzzer/requirements.txt
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
blinker==1.4
|
||||||
|
cryptography==3.4.8
|
||||||
|
dbus-python==1.2.18
|
||||||
|
distro==1.7.0
|
||||||
|
httplib2==0.20.2
|
||||||
|
importlib-metadata==4.6.4
|
||||||
|
jeepney==0.7.1
|
||||||
|
Jinja2==3.1.4
|
||||||
|
keyring==23.5.0
|
||||||
|
launchpadlib==1.10.16
|
||||||
|
lazr.restfulclient==0.14.4
|
||||||
|
lazr.uri==1.0.6
|
||||||
|
MarkupSafe==2.1.5
|
||||||
|
more-itertools==8.10.0
|
||||||
|
oauthlib==3.2.0
|
||||||
|
packaging==24.1
|
||||||
|
pip==24.1.1
|
||||||
|
pipdeptree==2.23.0
|
||||||
|
PyJWT==2.3.0
|
||||||
|
pyparsing==2.4.7
|
||||||
|
python-apt==2.4.0+ubuntu3
|
||||||
|
SecretStorage==3.3.1
|
||||||
|
setuptools==59.6.0
|
||||||
|
six==1.16.0
|
||||||
|
wadllib==1.3.6
|
||||||
|
wheel==0.37.1
|
||||||
|
zipp==1.0.0
|
@ -33,7 +33,8 @@ RUN apt-get update \
|
|||||||
&& apt-get clean \
|
&& apt-get clean \
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||||
|
|
||||||
RUN pip3 install pycurl
|
COPY requirements.txt /
|
||||||
|
RUN pip3 install --no-cache-dir -r requirements.txt && rm -rf /root/.cache/pip
|
||||||
|
|
||||||
# Architecture of the image when BuildKit/buildx is used
|
# Architecture of the image when BuildKit/buildx is used
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
|
26
docker/test/integration/base/requirements.txt
Normal file
26
docker/test/integration/base/requirements.txt
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
blinker==1.4
|
||||||
|
cryptography==3.4.8
|
||||||
|
dbus-python==1.2.18
|
||||||
|
distro==1.7.0
|
||||||
|
httplib2==0.20.2
|
||||||
|
importlib-metadata==4.6.4
|
||||||
|
jeepney==0.7.1
|
||||||
|
keyring==23.5.0
|
||||||
|
launchpadlib==1.10.16
|
||||||
|
lazr.restfulclient==0.14.4
|
||||||
|
lazr.uri==1.0.6
|
||||||
|
more-itertools==8.10.0
|
||||||
|
oauthlib==3.2.0
|
||||||
|
packaging==24.1
|
||||||
|
pip==24.1.1
|
||||||
|
pipdeptree==2.23.0
|
||||||
|
pycurl==7.45.3
|
||||||
|
PyJWT==2.3.0
|
||||||
|
pyparsing==2.4.7
|
||||||
|
python-apt==2.4.0+ubuntu3
|
||||||
|
SecretStorage==3.3.1
|
||||||
|
setuptools==59.6.0
|
||||||
|
six==1.16.0
|
||||||
|
wadllib==1.3.6
|
||||||
|
wheel==0.37.1
|
||||||
|
zipp==1.0.0
|
@ -2,4 +2,5 @@
|
|||||||
# Helper docker container to run python bottle apps
|
# Helper docker container to run python bottle apps
|
||||||
|
|
||||||
FROM python:3
|
FROM python:3
|
||||||
RUN python -m pip install bottle
|
COPY requirements.txt /
|
||||||
|
RUN python -m pip install --no-cache-dir -r requirements.txt
|
||||||
|
6
docker/test/integration/resolver/requirements.txt
Normal file
6
docker/test/integration/resolver/requirements.txt
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
bottle==0.12.25
|
||||||
|
packaging==24.1
|
||||||
|
pip==23.2.1
|
||||||
|
pipdeptree==2.23.0
|
||||||
|
setuptools==69.0.3
|
||||||
|
wheel==0.42.0
|
@ -26,7 +26,6 @@ RUN apt-get update \
|
|||||||
libicu-dev \
|
libicu-dev \
|
||||||
bsdutils \
|
bsdutils \
|
||||||
curl \
|
curl \
|
||||||
python3-pika \
|
|
||||||
liblua5.1-dev \
|
liblua5.1-dev \
|
||||||
luajit \
|
luajit \
|
||||||
libssl-dev \
|
libssl-dev \
|
||||||
@ -61,49 +60,8 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
|
|||||||
|
|
||||||
# kazoo 2.10.0 is broken
|
# kazoo 2.10.0 is broken
|
||||||
# https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html
|
# https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html
|
||||||
RUN python3 -m pip install --no-cache-dir \
|
COPY requirements.txt /
|
||||||
PyMySQL==1.1.0 \
|
RUN python3 -m pip install --no-cache-dir -r requirements.txt
|
||||||
asyncio==3.4.3 \
|
|
||||||
avro==1.10.2 \
|
|
||||||
azure-storage-blob==12.19.0 \
|
|
||||||
boto3==1.34.24 \
|
|
||||||
cassandra-driver==3.29.0 \
|
|
||||||
confluent-kafka==2.3.0 \
|
|
||||||
delta-spark==2.3.0 \
|
|
||||||
dict2xml==1.7.4 \
|
|
||||||
dicttoxml==1.7.16 \
|
|
||||||
docker==6.1.3 \
|
|
||||||
docker-compose==1.29.2 \
|
|
||||||
grpcio==1.60.0 \
|
|
||||||
grpcio-tools==1.60.0 \
|
|
||||||
kafka-python==2.0.2 \
|
|
||||||
lz4==4.3.3 \
|
|
||||||
minio==7.2.3 \
|
|
||||||
nats-py==2.6.0 \
|
|
||||||
protobuf==4.25.2 \
|
|
||||||
kazoo==2.9.0 \
|
|
||||||
psycopg2-binary==2.9.6 \
|
|
||||||
pyhdfs==0.3.1 \
|
|
||||||
pymongo==3.11.0 \
|
|
||||||
pyspark==3.3.2 \
|
|
||||||
pytest==7.4.4 \
|
|
||||||
pytest-order==1.0.0 \
|
|
||||||
pytest-random==0.2 \
|
|
||||||
pytest-repeat==0.9.3 \
|
|
||||||
pytest-timeout==2.2.0 \
|
|
||||||
pytest-xdist==3.5.0 \
|
|
||||||
pytest-reportlog==0.4.0 \
|
|
||||||
pytz==2023.3.post1 \
|
|
||||||
pyyaml==5.3.1 \
|
|
||||||
redis==5.0.1 \
|
|
||||||
requests-kerberos==0.14.0 \
|
|
||||||
tzlocal==2.1 \
|
|
||||||
retry==0.9.2 \
|
|
||||||
bs4==0.0.2 \
|
|
||||||
lxml==5.1.0 \
|
|
||||||
urllib3==2.0.7 \
|
|
||||||
jwcrypto==1.5.6
|
|
||||||
# bs4, lxml are for cloud tests, do not delete
|
|
||||||
|
|
||||||
# Hudi supports only spark 3.3.*, not 3.4
|
# Hudi supports only spark 3.3.*, not 3.4
|
||||||
RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \
|
RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \
|
||||||
|
113
docker/test/integration/runner/requirements.txt
Normal file
113
docker/test/integration/runner/requirements.txt
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
PyHDFS==0.3.1
|
||||||
|
PyJWT==2.3.0
|
||||||
|
PyMySQL==1.1.0
|
||||||
|
PyNaCl==1.5.0
|
||||||
|
PyYAML==5.3.1
|
||||||
|
SecretStorage==3.3.1
|
||||||
|
argon2-cffi-bindings==21.2.0
|
||||||
|
argon2-cffi==23.1.0
|
||||||
|
async-timeout==4.0.3
|
||||||
|
asyncio==3.4.3
|
||||||
|
attrs==23.2.0
|
||||||
|
avro==1.10.2
|
||||||
|
azure-core==1.30.1
|
||||||
|
azure-storage-blob==12.19.0
|
||||||
|
bcrypt==4.1.3
|
||||||
|
beautifulsoup4==4.12.3
|
||||||
|
blinker==1.4
|
||||||
|
boto3==1.34.24
|
||||||
|
botocore==1.34.101
|
||||||
|
bs4==0.0.2
|
||||||
|
cassandra-driver==3.29.0
|
||||||
|
certifi==2024.2.2
|
||||||
|
cffi==1.16.0
|
||||||
|
charset-normalizer==3.3.2
|
||||||
|
click==8.1.7
|
||||||
|
confluent-kafka==2.3.0
|
||||||
|
cryptography==3.4.8
|
||||||
|
dbus-python==1.2.18
|
||||||
|
decorator==5.1.1
|
||||||
|
delta-spark==2.3.0
|
||||||
|
dict2xml==1.7.4
|
||||||
|
dicttoxml==1.7.16
|
||||||
|
distro-info==1.1+ubuntu0.2
|
||||||
|
distro==1.7.0
|
||||||
|
docker-compose==1.29.2
|
||||||
|
docker==6.1.3
|
||||||
|
dockerpty==0.4.1
|
||||||
|
docopt==0.6.2
|
||||||
|
exceptiongroup==1.2.1
|
||||||
|
execnet==2.1.1
|
||||||
|
geomet==0.2.1.post1
|
||||||
|
grpcio-tools==1.60.0
|
||||||
|
grpcio==1.60.0
|
||||||
|
gssapi==1.8.3
|
||||||
|
httplib2==0.20.2
|
||||||
|
idna==3.7
|
||||||
|
importlib-metadata==4.6.4
|
||||||
|
iniconfig==2.0.0
|
||||||
|
isodate==0.6.1
|
||||||
|
jeepney==0.7.1
|
||||||
|
jmespath==1.0.1
|
||||||
|
jsonschema==3.2.0
|
||||||
|
jwcrypto==1.5.6
|
||||||
|
kafka-python==2.0.2
|
||||||
|
kazoo==2.9.0
|
||||||
|
keyring==23.5.0
|
||||||
|
krb5==0.5.1
|
||||||
|
launchpadlib==1.10.16
|
||||||
|
lazr.restfulclient==0.14.4
|
||||||
|
lazr.uri==1.0.6
|
||||||
|
lxml==5.1.0
|
||||||
|
lz4==4.3.3
|
||||||
|
minio==7.2.3
|
||||||
|
more-itertools==8.10.0
|
||||||
|
nats-py==2.6.0
|
||||||
|
oauthlib==3.2.0
|
||||||
|
packaging==24.0
|
||||||
|
paramiko==3.4.0
|
||||||
|
pika==1.2.0
|
||||||
|
pip==24.1.1
|
||||||
|
pipdeptree==2.23.0
|
||||||
|
pluggy==1.5.0
|
||||||
|
protobuf==4.25.2
|
||||||
|
psycopg2-binary==2.9.6
|
||||||
|
py4j==0.10.9.5
|
||||||
|
py==1.11.0
|
||||||
|
pycparser==2.22
|
||||||
|
pycryptodome==3.20.0
|
||||||
|
pymongo==3.11.0
|
||||||
|
pyparsing==2.4.7
|
||||||
|
pyrsistent==0.20.0
|
||||||
|
pyspark==3.3.2
|
||||||
|
pyspnego==0.10.2
|
||||||
|
pytest-order==1.0.0
|
||||||
|
pytest-random==0.2
|
||||||
|
pytest-repeat==0.9.3
|
||||||
|
pytest-reportlog==0.4.0
|
||||||
|
pytest-timeout==2.2.0
|
||||||
|
pytest-xdist==3.5.0
|
||||||
|
pytest==7.4.4
|
||||||
|
python-apt==2.4.0+ubuntu3
|
||||||
|
python-dateutil==2.9.0.post0
|
||||||
|
python-dotenv==0.21.1
|
||||||
|
pytz==2023.3.post1
|
||||||
|
redis==5.0.1
|
||||||
|
requests-kerberos==0.14.0
|
||||||
|
requests==2.31.0
|
||||||
|
retry==0.9.2
|
||||||
|
s3transfer==0.10.1
|
||||||
|
setuptools==59.6.0
|
||||||
|
simplejson==3.19.2
|
||||||
|
six==1.16.0
|
||||||
|
soupsieve==2.5
|
||||||
|
texttable==1.7.0
|
||||||
|
tomli==2.0.1
|
||||||
|
typing_extensions==4.11.0
|
||||||
|
tzlocal==2.1
|
||||||
|
unattended-upgrades==0.1
|
||||||
|
urllib3==2.0.7
|
||||||
|
wadllib==1.3.6
|
||||||
|
websocket-client==0.59.0
|
||||||
|
wheel==0.37.1
|
||||||
|
zipp==1.0.0
|
@ -1,3 +1,4 @@
|
|||||||
|
# docker build -t clickhouse/libfuzzer .
|
||||||
ARG FROM_TAG=latest
|
ARG FROM_TAG=latest
|
||||||
FROM clickhouse/test-base:$FROM_TAG
|
FROM clickhouse/test-base:$FROM_TAG
|
||||||
|
|
||||||
@ -29,7 +30,8 @@ RUN apt-get update \
|
|||||||
&& apt-get clean \
|
&& apt-get clean \
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||||
|
|
||||||
RUN pip3 install Jinja2
|
COPY requirements.txt /
|
||||||
|
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||||
|
|
||||||
COPY * /
|
COPY * /
|
||||||
|
|
||||||
|
27
docker/test/libfuzzer/requirements.txt
Normal file
27
docker/test/libfuzzer/requirements.txt
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
blinker==1.4
|
||||||
|
cryptography==3.4.8
|
||||||
|
dbus-python==1.2.18
|
||||||
|
distro==1.7.0
|
||||||
|
httplib2==0.20.2
|
||||||
|
importlib-metadata==4.6.4
|
||||||
|
jeepney==0.7.1
|
||||||
|
Jinja2==3.1.4
|
||||||
|
keyring==23.5.0
|
||||||
|
launchpadlib==1.10.16
|
||||||
|
lazr.restfulclient==0.14.4
|
||||||
|
lazr.uri==1.0.6
|
||||||
|
MarkupSafe==2.1.5
|
||||||
|
more-itertools==8.10.0
|
||||||
|
oauthlib==3.2.0
|
||||||
|
packaging==24.1
|
||||||
|
pip==24.1.1
|
||||||
|
pipdeptree==2.23.0
|
||||||
|
PyJWT==2.3.0
|
||||||
|
pyparsing==2.4.7
|
||||||
|
python-apt==2.4.0+ubuntu3
|
||||||
|
SecretStorage==3.3.1
|
||||||
|
setuptools==59.6.0
|
||||||
|
six==1.16.0
|
||||||
|
wadllib==1.3.6
|
||||||
|
wheel==0.37.1
|
||||||
|
zipp==1.0.0
|
@ -23,7 +23,6 @@ RUN apt-get update \
|
|||||||
python3 \
|
python3 \
|
||||||
python3-dev \
|
python3-dev \
|
||||||
python3-pip \
|
python3-pip \
|
||||||
python3-setuptools \
|
|
||||||
rsync \
|
rsync \
|
||||||
tree \
|
tree \
|
||||||
tzdata \
|
tzdata \
|
||||||
@ -33,12 +32,14 @@ RUN apt-get update \
|
|||||||
cargo \
|
cargo \
|
||||||
ripgrep \
|
ripgrep \
|
||||||
zstd \
|
zstd \
|
||||||
&& pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \
|
|
||||||
&& apt-get purge --yes python3-dev g++ \
|
&& apt-get purge --yes python3-dev g++ \
|
||||||
&& apt-get autoremove --yes \
|
&& apt-get autoremove --yes \
|
||||||
&& apt-get clean \
|
&& apt-get clean \
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||||
|
|
||||||
|
COPY requirements.txt /
|
||||||
|
RUN pip3 --no-cache-dir install -r requirements.txt
|
||||||
|
|
||||||
COPY run.sh /
|
COPY run.sh /
|
||||||
|
|
||||||
CMD ["bash", "/run.sh"]
|
CMD ["bash", "/run.sh"]
|
||||||
|
32
docker/test/performance-comparison/requirements.txt
Normal file
32
docker/test/performance-comparison/requirements.txt
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
blinker==1.4
|
||||||
|
clickhouse-driver==0.2.7
|
||||||
|
cryptography==3.4.8
|
||||||
|
dbus-python==1.2.18
|
||||||
|
distro==1.7.0
|
||||||
|
httplib2==0.20.2
|
||||||
|
importlib-metadata==4.6.4
|
||||||
|
jeepney==0.7.1
|
||||||
|
keyring==23.5.0
|
||||||
|
launchpadlib==1.10.16
|
||||||
|
lazr.restfulclient==0.14.4
|
||||||
|
lazr.uri==1.0.6
|
||||||
|
more-itertools==8.10.0
|
||||||
|
numpy==1.26.3
|
||||||
|
oauthlib==3.2.0
|
||||||
|
packaging==24.1
|
||||||
|
pip==24.1.1
|
||||||
|
pipdeptree==2.23.0
|
||||||
|
Pygments==2.11.2
|
||||||
|
PyJWT==2.3.0
|
||||||
|
pyparsing==2.4.7
|
||||||
|
python-apt==2.4.0+ubuntu3
|
||||||
|
pytz==2023.4
|
||||||
|
PyYAML==6.0.1
|
||||||
|
scipy==1.12.0
|
||||||
|
SecretStorage==3.3.1
|
||||||
|
setuptools==59.6.0
|
||||||
|
six==1.16.0
|
||||||
|
tzlocal==2.1
|
||||||
|
wadllib==1.3.6
|
||||||
|
wheel==0.37.1
|
||||||
|
zipp==1.0.0
|
@ -18,11 +18,8 @@ RUN apt-get update --yes \
|
|||||||
&& apt-get clean \
|
&& apt-get clean \
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||||
|
|
||||||
RUN pip3 install \
|
COPY requirements.txt /
|
||||||
numpy \
|
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||||
pyodbc \
|
|
||||||
deepdiff \
|
|
||||||
sqlglot
|
|
||||||
|
|
||||||
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz"
|
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz"
|
||||||
|
|
||||||
|
30
docker/test/sqllogic/requirements.txt
Normal file
30
docker/test/sqllogic/requirements.txt
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
blinker==1.4
|
||||||
|
cryptography==3.4.8
|
||||||
|
dbus-python==1.2.18
|
||||||
|
deepdiff==7.0.1
|
||||||
|
distro==1.7.0
|
||||||
|
httplib2==0.20.2
|
||||||
|
importlib-metadata==4.6.4
|
||||||
|
jeepney==0.7.1
|
||||||
|
keyring==23.5.0
|
||||||
|
launchpadlib==1.10.16
|
||||||
|
lazr.restfulclient==0.14.4
|
||||||
|
lazr.uri==1.0.6
|
||||||
|
more-itertools==8.10.0
|
||||||
|
numpy==1.26.4
|
||||||
|
oauthlib==3.2.0
|
||||||
|
ordered-set==4.1.0
|
||||||
|
packaging==24.1
|
||||||
|
pip==24.1.1
|
||||||
|
pipdeptree==2.23.0
|
||||||
|
PyJWT==2.3.0
|
||||||
|
pyodbc==5.1.0
|
||||||
|
pyparsing==2.4.7
|
||||||
|
python-apt==2.4.0+ubuntu3
|
||||||
|
SecretStorage==3.3.1
|
||||||
|
setuptools==59.6.0
|
||||||
|
six==1.16.0
|
||||||
|
sqlglot==23.16.0
|
||||||
|
wadllib==1.3.6
|
||||||
|
wheel==0.37.1
|
||||||
|
zipp==1.0.0
|
@ -14,9 +14,8 @@ RUN apt-get update --yes \
|
|||||||
&& apt-get clean \
|
&& apt-get clean \
|
||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||||
|
|
||||||
RUN pip3 install \
|
COPY requirements.txt /
|
||||||
pyyaml \
|
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||||
clickhouse-driver
|
|
||||||
|
|
||||||
ARG sqltest_repo="https://github.com/elliotchance/sqltest/"
|
ARG sqltest_repo="https://github.com/elliotchance/sqltest/"
|
||||||
|
|
||||||
|
29
docker/test/sqltest/requirements.txt
Normal file
29
docker/test/sqltest/requirements.txt
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
blinker==1.4
|
||||||
|
clickhouse-driver==0.2.7
|
||||||
|
cryptography==3.4.8
|
||||||
|
dbus-python==1.2.18
|
||||||
|
distro==1.7.0
|
||||||
|
httplib2==0.20.2
|
||||||
|
importlib-metadata==4.6.4
|
||||||
|
jeepney==0.7.1
|
||||||
|
keyring==23.5.0
|
||||||
|
launchpadlib==1.10.16
|
||||||
|
lazr.restfulclient==0.14.4
|
||||||
|
lazr.uri==1.0.6
|
||||||
|
more-itertools==8.10.0
|
||||||
|
oauthlib==3.2.0
|
||||||
|
packaging==24.1
|
||||||
|
pip==24.1.1
|
||||||
|
pipdeptree==2.23.0
|
||||||
|
PyJWT==2.3.0
|
||||||
|
pyparsing==2.4.7
|
||||||
|
python-apt==2.4.0+ubuntu3
|
||||||
|
pytz==2024.1
|
||||||
|
PyYAML==6.0.1
|
||||||
|
SecretStorage==3.3.1
|
||||||
|
setuptools==59.6.0
|
||||||
|
six==1.16.0
|
||||||
|
tzlocal==5.2
|
||||||
|
wadllib==1.3.6
|
||||||
|
wheel==0.37.1
|
||||||
|
zipp==1.0.0
|
@ -6,7 +6,6 @@ FROM clickhouse/stateless-test:$FROM_TAG
|
|||||||
RUN apt-get update -y \
|
RUN apt-get update -y \
|
||||||
&& env DEBIAN_FRONTEND=noninteractive \
|
&& env DEBIAN_FRONTEND=noninteractive \
|
||||||
apt-get install --yes --no-install-recommends \
|
apt-get install --yes --no-install-recommends \
|
||||||
python3-requests \
|
|
||||||
nodejs \
|
nodejs \
|
||||||
npm \
|
npm \
|
||||||
&& apt-get clean \
|
&& apt-get clean \
|
||||||
|
@ -25,10 +25,7 @@ RUN apt-get update -y \
|
|||||||
openssl \
|
openssl \
|
||||||
postgresql-client \
|
postgresql-client \
|
||||||
python3 \
|
python3 \
|
||||||
python3-lxml \
|
|
||||||
python3-pip \
|
python3-pip \
|
||||||
python3-requests \
|
|
||||||
python3-termcolor \
|
|
||||||
qemu-user-static \
|
qemu-user-static \
|
||||||
sqlite3 \
|
sqlite3 \
|
||||||
sudo \
|
sudo \
|
||||||
@ -51,7 +48,8 @@ RUN curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PR
|
|||||||
&& unzip protoc-${PROTOC_VERSION}-linux-x86_64.zip -d /usr/local \
|
&& unzip protoc-${PROTOC_VERSION}-linux-x86_64.zip -d /usr/local \
|
||||||
&& rm protoc-${PROTOC_VERSION}-linux-x86_64.zip
|
&& rm protoc-${PROTOC_VERSION}-linux-x86_64.zip
|
||||||
|
|
||||||
RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0
|
COPY requirements.txt /
|
||||||
|
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||||
|
|
||||||
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||||
&& cd /tmp/clickhouse-odbc-tmp \
|
&& cd /tmp/clickhouse-odbc-tmp \
|
||||||
|
51
docker/test/stateless/requirements.txt
Normal file
51
docker/test/stateless/requirements.txt
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
awscli==1.22.34
|
||||||
|
blinker==1.4
|
||||||
|
botocore==1.23.34
|
||||||
|
certifi==2020.6.20
|
||||||
|
chardet==4.0.0
|
||||||
|
colorama==0.4.4
|
||||||
|
cryptography==3.4.8
|
||||||
|
dbus-python==1.2.18
|
||||||
|
distro==1.7.0
|
||||||
|
docutils==0.17.1
|
||||||
|
gyp==0.1
|
||||||
|
httplib2==0.20.2
|
||||||
|
idna==3.3
|
||||||
|
importlib-metadata==4.6.4
|
||||||
|
jeepney==0.7.1
|
||||||
|
Jinja2==3.1.3
|
||||||
|
jmespath==0.10.0
|
||||||
|
keyring==23.5.0
|
||||||
|
launchpadlib==1.10.16
|
||||||
|
lazr.restfulclient==0.14.4
|
||||||
|
lazr.uri==1.0.6
|
||||||
|
lxml==4.8.0
|
||||||
|
MarkupSafe==2.1.5
|
||||||
|
more-itertools==8.10.0
|
||||||
|
numpy==1.26.3
|
||||||
|
oauthlib==3.2.0
|
||||||
|
packaging==24.1
|
||||||
|
pandas==1.5.3
|
||||||
|
pip==24.1.1
|
||||||
|
pipdeptree==2.23.0
|
||||||
|
pyarrow==15.0.0
|
||||||
|
pyasn1==0.4.8
|
||||||
|
PyJWT==2.3.0
|
||||||
|
pyparsing==2.4.7
|
||||||
|
python-apt==2.4.0+ubuntu3
|
||||||
|
python-dateutil==2.8.1
|
||||||
|
pytz==2024.1
|
||||||
|
PyYAML==6.0.1
|
||||||
|
requests==2.32.3
|
||||||
|
roman==3.3
|
||||||
|
rsa==4.8
|
||||||
|
s3transfer==0.5.0
|
||||||
|
scipy==1.12.0
|
||||||
|
SecretStorage==3.3.1
|
||||||
|
setuptools==59.6.0
|
||||||
|
six==1.16.0
|
||||||
|
termcolor==1.1.0
|
||||||
|
urllib3==1.26.5
|
||||||
|
wadllib==1.3.6
|
||||||
|
wheel==0.37.1
|
||||||
|
zipp==1.0.0
|
@ -6,6 +6,9 @@ source /setup_export_logs.sh
|
|||||||
# fail on errors, verbose and export all env variables
|
# fail on errors, verbose and export all env variables
|
||||||
set -e -x -a
|
set -e -x -a
|
||||||
|
|
||||||
|
MAX_RUN_TIME=${MAX_RUN_TIME:-10800}
|
||||||
|
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME))
|
||||||
|
|
||||||
# Choose random timezone for this test run.
|
# Choose random timezone for this test run.
|
||||||
#
|
#
|
||||||
# NOTE: that clickhouse-test will randomize session_timezone by itself as well
|
# NOTE: that clickhouse-test will randomize session_timezone by itself as well
|
||||||
@ -262,14 +265,17 @@ function run_tests()
|
|||||||
|
|
||||||
export -f run_tests
|
export -f run_tests
|
||||||
|
|
||||||
|
|
||||||
|
# This should be enough to setup job and collect artifacts
|
||||||
|
TIMEOUT=$((MAX_RUN_TIME - 300))
|
||||||
if [ "$NUM_TRIES" -gt "1" ]; then
|
if [ "$NUM_TRIES" -gt "1" ]; then
|
||||||
# We don't run tests with Ordinary database in PRs, only in master.
|
# We don't run tests with Ordinary database in PRs, only in master.
|
||||||
# So run new/changed tests with Ordinary at least once in flaky check.
|
# So run new/changed tests with Ordinary at least once in flaky check.
|
||||||
timeout_with_logging "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
|
timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
|
||||||
| sed 's/All tests have finished//' | sed 's/No tests were run//' ||:
|
| sed 's/All tests have finished//' | sed 's/No tests were run//' ||:
|
||||||
fi
|
fi
|
||||||
|
|
||||||
timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||:
|
timeout_with_logging "$TIMEOUT" bash -c run_tests ||:
|
||||||
|
|
||||||
echo "Files in current directory"
|
echo "Files in current directory"
|
||||||
ls -la ./
|
ls -la ./
|
||||||
|
@ -38,7 +38,7 @@ function fn_exists() {
|
|||||||
function timeout_with_logging() {
|
function timeout_with_logging() {
|
||||||
local exit_code=0
|
local exit_code=0
|
||||||
|
|
||||||
timeout "${@}" || exit_code="${?}"
|
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
|
||||||
|
|
||||||
if [[ "${exit_code}" -eq "124" ]]
|
if [[ "${exit_code}" -eq "124" ]]
|
||||||
then
|
then
|
||||||
|
@ -23,22 +23,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
|||||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||||
|
|
||||||
# python-magic is the same version as in Ubuntu 22.04
|
# python-magic is the same version as in Ubuntu 22.04
|
||||||
RUN pip3 install \
|
COPY requirements.txt /
|
||||||
PyGithub \
|
RUN pip3 install --no-cache-dir -r requirements.txt
|
||||||
black==23.12.0 \
|
|
||||||
boto3 \
|
|
||||||
codespell==2.2.1 \
|
|
||||||
mypy==1.8.0 \
|
|
||||||
pylint==3.1.0 \
|
|
||||||
python-magic==0.4.24 \
|
|
||||||
flake8==4.0.1 \
|
|
||||||
requests \
|
|
||||||
thefuzz \
|
|
||||||
tqdm==4.66.4 \
|
|
||||||
types-requests \
|
|
||||||
unidiff \
|
|
||||||
jwt \
|
|
||||||
&& rm -rf /root/.cache/pip
|
|
||||||
|
|
||||||
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
|
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
|
||||||
ENV LC_ALL en_US.UTF-8
|
ENV LC_ALL en_US.UTF-8
|
||||||
|
58
docker/test/style/requirements.txt
Normal file
58
docker/test/style/requirements.txt
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
aiohttp==3.9.5
|
||||||
|
aiosignal==1.3.1
|
||||||
|
astroid==3.1.0
|
||||||
|
async-timeout==4.0.3
|
||||||
|
attrs==23.2.0
|
||||||
|
black==23.12.0
|
||||||
|
boto3==1.34.131
|
||||||
|
botocore==1.34.131
|
||||||
|
certifi==2024.6.2
|
||||||
|
cffi==1.16.0
|
||||||
|
charset-normalizer==3.3.2
|
||||||
|
click==8.1.7
|
||||||
|
codespell==2.2.1
|
||||||
|
cryptography==42.0.8
|
||||||
|
Deprecated==1.2.14
|
||||||
|
dill==0.3.8
|
||||||
|
flake8==4.0.1
|
||||||
|
frozenlist==1.4.1
|
||||||
|
idna==3.7
|
||||||
|
isort==5.13.2
|
||||||
|
jmespath==1.0.1
|
||||||
|
jwt==1.3.1
|
||||||
|
mccabe==0.6.1
|
||||||
|
multidict==6.0.5
|
||||||
|
mypy==1.8.0
|
||||||
|
mypy-extensions==1.0.0
|
||||||
|
packaging==24.1
|
||||||
|
pathspec==0.9.0
|
||||||
|
pip==24.1.1
|
||||||
|
pipdeptree==2.23.0
|
||||||
|
platformdirs==4.2.2
|
||||||
|
pycodestyle==2.8.0
|
||||||
|
pycparser==2.22
|
||||||
|
pyflakes==2.4.0
|
||||||
|
PyGithub==2.3.0
|
||||||
|
PyJWT==2.8.0
|
||||||
|
pylint==3.1.0
|
||||||
|
PyNaCl==1.5.0
|
||||||
|
python-dateutil==2.9.0.post0
|
||||||
|
python-magic==0.4.24
|
||||||
|
PyYAML==6.0.1
|
||||||
|
rapidfuzz==3.9.3
|
||||||
|
requests==2.32.3
|
||||||
|
s3transfer==0.10.1
|
||||||
|
setuptools==59.6.0
|
||||||
|
six==1.16.0
|
||||||
|
thefuzz==0.22.1
|
||||||
|
tomli==2.0.1
|
||||||
|
tomlkit==0.12.5
|
||||||
|
tqdm==4.66.4
|
||||||
|
types-requests==2.32.0.20240622
|
||||||
|
typing_extensions==4.12.2
|
||||||
|
unidiff==0.7.5
|
||||||
|
urllib3==2.2.2
|
||||||
|
wheel==0.37.1
|
||||||
|
wrapt==1.16.0
|
||||||
|
yamllint==1.26.3
|
||||||
|
yarl==1.9.4
|
26
docs/changelogs/v24.6.2.17-stable.md
Normal file
26
docs/changelogs/v24.6.2.17-stable.md
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
---
|
||||||
|
sidebar_position: 1
|
||||||
|
sidebar_label: 2024
|
||||||
|
---
|
||||||
|
|
||||||
|
# 2024 Changelog
|
||||||
|
|
||||||
|
### ClickHouse release v24.6.2.17-stable (5710a8b5c0c) FIXME as compared to v24.6.1.4423-stable (dcced7c8478)
|
||||||
|
|
||||||
|
#### New Feature
|
||||||
|
* Backported in [#66002](https://github.com/ClickHouse/ClickHouse/issues/66002): Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||||
|
|
||||||
|
#### Improvement
|
||||||
|
* Backported in [#65898](https://github.com/ClickHouse/ClickHouse/issues/65898): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||||
|
|
||||||
|
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||||
|
* Backported in [#65935](https://github.com/ClickHouse/ClickHouse/issues/65935): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||||
|
|
||||||
|
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||||
|
|
||||||
|
* Backported in [#65907](https://github.com/ClickHouse/ClickHouse/issues/65907): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||||
|
* Backported in [#65962](https://github.com/ClickHouse/ClickHouse/issues/65962): Add missing workload identity changes. [#65848](https://github.com/ClickHouse/ClickHouse/pull/65848) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||||
|
* Backported in [#66033](https://github.com/ClickHouse/ClickHouse/issues/66033): Follow up to [#65046](https://github.com/ClickHouse/ClickHouse/issues/65046). [#65928](https://github.com/ClickHouse/ClickHouse/pull/65928) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||||
|
* Backported in [#66076](https://github.com/ClickHouse/ClickHouse/issues/66076): Fix support of non-const scale arguments in rounding functions. [#65983](https://github.com/ClickHouse/ClickHouse/pull/65983) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
|
||||||
|
* Backported in [#66017](https://github.com/ClickHouse/ClickHouse/issues/66017): Fix race in s3queue. [#65986](https://github.com/ClickHouse/ClickHouse/pull/65986) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||||
|
|
@ -84,6 +84,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
|
|||||||
- [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level
|
- [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level
|
||||||
- `password` for the file on disk
|
- `password` for the file on disk
|
||||||
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
|
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
|
||||||
|
- `use_same_s3_credentials_for_base_backup`: whether base backup to S3 should inherit credentials from the query. Only works with `S3`.
|
||||||
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
|
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
|
||||||
- `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family.
|
- `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family.
|
||||||
- `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD`
|
- `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD`
|
||||||
|
@ -974,6 +974,13 @@ Default value: false
|
|||||||
|
|
||||||
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
|
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
|
||||||
|
|
||||||
|
## use_compact_variant_discriminators_serialization {#use_compact_variant_discriminators_serialization}
|
||||||
|
|
||||||
|
Enables compact mode for binary serialization of discriminators in Variant data type.
|
||||||
|
This mode allows to use significantly less memory for storing discriminators in parts when there is mostly one variant or a lot of NULL values.
|
||||||
|
|
||||||
|
Default value: true
|
||||||
|
|
||||||
## merge_workload
|
## merge_workload
|
||||||
|
|
||||||
Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead.
|
Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead.
|
||||||
|
@ -0,0 +1,37 @@
|
|||||||
|
---
|
||||||
|
slug: /en/sql-reference/aggregate-functions/reference/aggthrow
|
||||||
|
sidebar_position: 101
|
||||||
|
---
|
||||||
|
|
||||||
|
# aggThrow
|
||||||
|
|
||||||
|
This function can be used for the purpose of testing exception safety. It will throw an exception on creation with the specified probability.
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
aggThrow(throw_prob)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `throw_prob` — Probability to throw on creation. [Float64](../../data-types/float.md).
|
||||||
|
|
||||||
|
**Returned value**
|
||||||
|
|
||||||
|
- An exception: `Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully`.
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
Query:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT number % 2 AS even, aggThrow(number) FROM numbers(10) GROUP BY even;
|
||||||
|
```
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
```response
|
||||||
|
Received exception:
|
||||||
|
Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully: While executing AggregatingTransform. (AGGREGATE_FUNCTION_THROW)
|
||||||
|
```
|
@ -43,6 +43,7 @@ Standard aggregate functions:
|
|||||||
|
|
||||||
ClickHouse-specific aggregate functions:
|
ClickHouse-specific aggregate functions:
|
||||||
|
|
||||||
|
- [aggThrow](../reference/aggthrow.md)
|
||||||
- [analysisOfVariance](../reference/analysis_of_variance.md)
|
- [analysisOfVariance](../reference/analysis_of_variance.md)
|
||||||
- [any](../reference/any_respect_nulls.md)
|
- [any](../reference/any_respect_nulls.md)
|
||||||
- [anyHeavy](../reference/anyheavy.md)
|
- [anyHeavy](../reference/anyheavy.md)
|
||||||
|
@ -83,7 +83,57 @@ Result:
|
|||||||
```
|
```
|
||||||
## makeDate32
|
## makeDate32
|
||||||
|
|
||||||
Like [makeDate](#makedate) but produces a [Date32](../data-types/date32.md).
|
Creates a date of type [Date32](../../sql-reference/data-types/date32.md) from a year, month, day (or optionally a year and a day).
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
makeDate32(year, [month,] day)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||||
|
- `month` — Month (optional). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||||
|
- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||||
|
|
||||||
|
:::note
|
||||||
|
If `month` is omitted then `day` should take a value between `1` and `365`, otherwise it should take a value between `1` and `31`.
|
||||||
|
:::
|
||||||
|
|
||||||
|
**Returned values**
|
||||||
|
|
||||||
|
- A date created from the arguments. [Date32](../../sql-reference/data-types/date32.md).
|
||||||
|
|
||||||
|
**Examples**
|
||||||
|
|
||||||
|
Create a date from a year, month, and day:
|
||||||
|
|
||||||
|
Query:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT makeDate32(2024, 1, 1);
|
||||||
|
```
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
```response
|
||||||
|
2024-01-01
|
||||||
|
```
|
||||||
|
|
||||||
|
Create a Date from a year and day of year:
|
||||||
|
|
||||||
|
Query:
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT makeDate32(2024, 100);
|
||||||
|
```
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
```response
|
||||||
|
2024-04-09
|
||||||
|
```
|
||||||
|
|
||||||
## makeDateTime
|
## makeDateTime
|
||||||
|
|
||||||
@ -125,12 +175,38 @@ Result:
|
|||||||
|
|
||||||
## makeDateTime64
|
## makeDateTime64
|
||||||
|
|
||||||
Like [makeDateTime](#makedatetime) but produces a [DateTime64](../data-types/datetime64.md).
|
Creates a [DateTime64](../../sql-reference/data-types/datetime64.md) data type value from its components: year, month, day, hour, minute, second. With optional sub-second precision.
|
||||||
|
|
||||||
**Syntax**
|
**Syntax**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
makeDateTime64(year, month, day, hour, minute, second[, precision])
|
||||||
|
```
|
||||||
|
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `year` — Year (0-9999). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||||
|
- `month` — Month (1-12). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||||
|
- `day` — Day (1-31). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||||
|
- `hour` — Hour (0-23). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||||
|
- `minute` — Minute (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||||
|
- `second` — Second (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||||
|
- `precision` — Optional precision of the sub-second component (0-9). [Integer](../../sql-reference/data-types/int-uint.md).
|
||||||
|
|
||||||
|
**Returned value**
|
||||||
|
|
||||||
|
- A date and time created from the supplied arguments. [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]])
|
SELECT makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5);
|
||||||
|
```
|
||||||
|
|
||||||
|
```response
|
||||||
|
┌─makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5)─┐
|
||||||
|
│ 2023-05-15 10:30:45.00779 │
|
||||||
|
└─────────────────────────────────────────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
## timestamp
|
## timestamp
|
||||||
|
@ -86,7 +86,7 @@ Returns the fully qualified domain name of the ClickHouse server.
|
|||||||
fqdn();
|
fqdn();
|
||||||
```
|
```
|
||||||
|
|
||||||
This function is case-insensitive.
|
Aliases: `fullHostName`, 'FQDN'.
|
||||||
|
|
||||||
**Returned value**
|
**Returned value**
|
||||||
|
|
||||||
|
@ -6,41 +6,119 @@ sidebar_label: Time Window
|
|||||||
|
|
||||||
# Time Window Functions
|
# Time Window Functions
|
||||||
|
|
||||||
Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below:
|
Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with [WindowView](../statements/create/view.md/#window-view-experimental) are listed below:
|
||||||
|
|
||||||
## tumble
|
## tumble
|
||||||
|
|
||||||
A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`).
|
A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`).
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
tumble(time_attr, interval [, timezone])
|
tumble(time_attr, interval [, timezone])
|
||||||
```
|
```
|
||||||
|
|
||||||
**Arguments**
|
**Arguments**
|
||||||
- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type.
|
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||||
- `interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type.
|
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
|
||||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||||
|
|
||||||
**Returned values**
|
**Returned values**
|
||||||
|
|
||||||
- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`.
|
- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md)).
|
||||||
|
|
||||||
**Example**
|
**Example**
|
||||||
|
|
||||||
Query:
|
Query:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT tumble(now(), toIntervalDay('1'))
|
SELECT tumble(now(), toIntervalDay('1'));
|
||||||
```
|
```
|
||||||
|
|
||||||
Result:
|
Result:
|
||||||
|
|
||||||
``` text
|
``` text
|
||||||
┌─tumble(now(), toIntervalDay('1'))─────────────┐
|
┌─tumble(now(), toIntervalDay('1'))─────────────┐
|
||||||
│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │
|
│ ('2024-07-04 00:00:00','2024-07-05 00:00:00') │
|
||||||
└───────────────────────────────────────────────┘
|
└───────────────────────────────────────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## tumbleStart
|
||||||
|
|
||||||
|
Returns the inclusive lower bound of the corresponding [tumbling window](#tumble).
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
tumbleStart(time_attr, interval [, timezone]);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||||
|
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
|
||||||
|
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||||
|
|
||||||
|
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||||
|
|
||||||
|
**Returned values**
|
||||||
|
|
||||||
|
- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
Query:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT tumbleStart(now(), toIntervalDay('1'));
|
||||||
|
```
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
```response
|
||||||
|
┌─tumbleStart(now(), toIntervalDay('1'))─┐
|
||||||
|
│ 2024-07-04 00:00:00 │
|
||||||
|
└────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## tumbleEnd
|
||||||
|
|
||||||
|
Returns the exclusive upper bound of the corresponding [tumbling window](#tumble).
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
tumbleEnd(time_attr, interval [, timezone]);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||||
|
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
|
||||||
|
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||||
|
|
||||||
|
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||||
|
|
||||||
|
**Returned values**
|
||||||
|
|
||||||
|
- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
Query:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT tumbleEnd(now(), toIntervalDay('1'));
|
||||||
|
```
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
```response
|
||||||
|
┌─tumbleEnd(now(), toIntervalDay('1'))─┐
|
||||||
|
│ 2024-07-05 00:00:00 │
|
||||||
|
└──────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
## hop
|
## hop
|
||||||
|
|
||||||
A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows.
|
A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows.
|
||||||
@ -51,65 +129,118 @@ hop(time_attr, hop_interval, window_interval [, timezone])
|
|||||||
|
|
||||||
**Arguments**
|
**Arguments**
|
||||||
|
|
||||||
- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type.
|
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||||
- `hop_interval` - Hop interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number.
|
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
|
||||||
- `window_interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number.
|
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
|
||||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||||
|
|
||||||
**Returned values**
|
**Returned values**
|
||||||
|
|
||||||
- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`.
|
- The inclusive lower and exclusive upper bound of the corresponding hopping window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`.
|
||||||
|
|
||||||
|
:::note
|
||||||
|
Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
|
||||||
|
:::
|
||||||
|
|
||||||
**Example**
|
**Example**
|
||||||
|
|
||||||
Query:
|
Query:
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND)
|
SELECT hop(now(), INTERVAL '1' DAY, INTERVAL '2' DAY);
|
||||||
```
|
```
|
||||||
|
|
||||||
Result:
|
Result:
|
||||||
|
|
||||||
``` text
|
``` text
|
||||||
┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐
|
┌─hop(now(), toIntervalDay('1'), toIntervalDay('2'))─┐
|
||||||
│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │
|
│ ('2024-07-03 00:00:00','2024-07-05 00:00:00') │
|
||||||
└───────────────────────────────────────────────────────────┘
|
└────────────────────────────────────────────────────┘
|
||||||
```
|
|
||||||
|
|
||||||
## tumbleStart
|
|
||||||
|
|
||||||
Returns the inclusive lower bound of the corresponding tumbling window.
|
|
||||||
|
|
||||||
``` sql
|
|
||||||
tumbleStart(bounds_tuple);
|
|
||||||
tumbleStart(time_attr, interval [, timezone]);
|
|
||||||
```
|
|
||||||
|
|
||||||
## tumbleEnd
|
|
||||||
|
|
||||||
Returns the exclusive upper bound of the corresponding tumbling window.
|
|
||||||
|
|
||||||
``` sql
|
|
||||||
tumbleEnd(bounds_tuple);
|
|
||||||
tumbleEnd(time_attr, interval [, timezone]);
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## hopStart
|
## hopStart
|
||||||
|
|
||||||
Returns the inclusive lower bound of the corresponding hopping window.
|
Returns the inclusive lower bound of the corresponding [hopping window](#hop).
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
hopStart(bounds_tuple);
|
|
||||||
hopStart(time_attr, hop_interval, window_interval [, timezone]);
|
hopStart(time_attr, hop_interval, window_interval [, timezone]);
|
||||||
```
|
```
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||||
|
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
|
||||||
|
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
|
||||||
|
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||||
|
|
||||||
|
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||||
|
|
||||||
|
**Returned values**
|
||||||
|
|
||||||
|
- The inclusive lower bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
|
||||||
|
|
||||||
|
:::note
|
||||||
|
Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
|
||||||
|
:::
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
Query:
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT hopStart(now(), INTERVAL '1' DAY, INTERVAL '2' DAY);
|
||||||
|
```
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─hopStart(now(), toIntervalDay('1'), toIntervalDay('2'))─┐
|
||||||
|
│ 2024-07-03 00:00:00 │
|
||||||
|
└─────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
## hopEnd
|
## hopEnd
|
||||||
|
|
||||||
Returns the exclusive upper bound of the corresponding hopping window.
|
Returns the exclusive upper bound of the corresponding [hopping window](#hop).
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
``` sql
|
``` sql
|
||||||
hopEnd(bounds_tuple);
|
|
||||||
hopEnd(time_attr, hop_interval, window_interval [, timezone]);
|
hopEnd(time_attr, hop_interval, window_interval [, timezone]);
|
||||||
|
```
|
||||||
|
**Arguments**
|
||||||
|
|
||||||
|
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||||
|
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
|
||||||
|
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
|
||||||
|
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||||
|
|
||||||
|
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||||
|
|
||||||
|
**Returned values**
|
||||||
|
|
||||||
|
- The exclusive upper bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
|
||||||
|
|
||||||
|
:::note
|
||||||
|
Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
|
||||||
|
:::
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
Query:
|
||||||
|
|
||||||
|
``` sql
|
||||||
|
SELECT hopEnd(now(), INTERVAL '1' DAY, INTERVAL '2' DAY);
|
||||||
|
```
|
||||||
|
|
||||||
|
Result:
|
||||||
|
|
||||||
|
``` text
|
||||||
|
┌─hopEnd(now(), toIntervalDay('1'), toIntervalDay('2'))─┐
|
||||||
|
│ 2024-07-05 00:00:00 │
|
||||||
|
└───────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Related content
|
## Related content
|
||||||
|
@ -23,6 +23,7 @@ ClickHouse supports the standard grammar for defining windows and window functio
|
|||||||
| `GROUPS` frame | ❌ |
|
| `GROUPS` frame | ❌ |
|
||||||
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) |
|
| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) |
|
||||||
| `rank()`, `dense_rank()`, `row_number()` | ✅ |
|
| `rank()`, `dense_rank()`, `row_number()` | ✅ |
|
||||||
|
| `percent_rank()` | ✅ Efficiently computes the relative standing of a value within a partition in a dataset. This function effectively replaces the more verbose and computationally intensive manual SQL calculation expressed as `ifNull((rank() OVER(PARTITION BY x ORDER BY y) - 1) / nullif(count(1) OVER(PARTITION BY x) - 1, 0), 0)`|
|
||||||
| `lag/lead(value, offset)` | ❌ <br/> You can use one of the following workarounds:<br/> 1) `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` <br/> 2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
|
| `lag/lead(value, offset)` | ❌ <br/> You can use one of the following workarounds:<br/> 1) `any(value) over (.... rows between <offset> preceding and <offset> preceding)`, or `following` for `lead` <br/> 2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` |
|
||||||
| ntile(buckets) | ✅ <br/> Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). |
|
| ntile(buckets) | ✅ <br/> Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). |
|
||||||
|
|
||||||
|
@ -626,6 +626,28 @@ static void initializeAzureSDKLogger(
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(SANITIZER)
|
||||||
|
static std::vector<String> getSanitizerNames()
|
||||||
|
{
|
||||||
|
std::vector<String> names;
|
||||||
|
|
||||||
|
#if defined(ADDRESS_SANITIZER)
|
||||||
|
names.push_back("address");
|
||||||
|
#endif
|
||||||
|
#if defined(THREAD_SANITIZER)
|
||||||
|
names.push_back("thread");
|
||||||
|
#endif
|
||||||
|
#if defined(MEMORY_SANITIZER)
|
||||||
|
names.push_back("memory");
|
||||||
|
#endif
|
||||||
|
#if defined(UNDEFINED_BEHAVIOR_SANITIZER)
|
||||||
|
names.push_back("undefined behavior");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return names;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int Server::main(const std::vector<std::string> & /*args*/)
|
int Server::main(const std::vector<std::string> & /*args*/)
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@ -716,7 +738,17 @@ try
|
|||||||
global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable.");
|
global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable.");
|
||||||
|
|
||||||
#if defined(SANITIZER)
|
#if defined(SANITIZER)
|
||||||
global_context->addWarningMessage("Server was built with sanitizer. It will work slowly.");
|
auto sanitizers = getSanitizerNames();
|
||||||
|
|
||||||
|
String log_message;
|
||||||
|
if (sanitizers.empty())
|
||||||
|
log_message = "sanitizer";
|
||||||
|
else if (sanitizers.size() == 1)
|
||||||
|
log_message = fmt::format("{} sanitizer", sanitizers.front());
|
||||||
|
else
|
||||||
|
log_message = fmt::format("sanitizers ({})", fmt::join(sanitizers, ", "));
|
||||||
|
|
||||||
|
global_context->addWarningMessage(fmt::format("Server was built with {}. It will work slowly.", log_message));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(SANITIZE_COVERAGE) || WITH_COVERAGE
|
#if defined(SANITIZE_COVERAGE) || WITH_COVERAGE
|
||||||
|
@ -1093,10 +1093,4 @@ void ColumnObject::finalize()
|
|||||||
checkObjectHasNoAmbiguosPaths(getKeys());
|
checkObjectHasNoAmbiguosPaths(getKeys());
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnObject::updateHashFast(SipHash & hash) const
|
|
||||||
{
|
|
||||||
for (const auto & entry : subcolumns)
|
|
||||||
for (auto & part : entry->data.data)
|
|
||||||
part->updateHashFast(hash);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -242,7 +242,7 @@ public:
|
|||||||
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
|
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
|
||||||
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
|
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
|
||||||
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
|
void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); }
|
||||||
void updateHashFast(SipHash & hash) const override;
|
void updateHashFast(SipHash &) const override { throwMustBeConcrete(); }
|
||||||
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
|
void expand(const Filter &, bool) override { throwMustBeConcrete(); }
|
||||||
bool hasEqualValues() const override { throwMustBeConcrete(); }
|
bool hasEqualValues() const override { throwMustBeConcrete(); }
|
||||||
size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); }
|
size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); }
|
||||||
|
@ -711,7 +711,13 @@ void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_c
|
|||||||
ColumnPtr ColumnTuple::compress() const
|
ColumnPtr ColumnTuple::compress() const
|
||||||
{
|
{
|
||||||
if (columns.empty())
|
if (columns.empty())
|
||||||
return Ptr();
|
{
|
||||||
|
return ColumnCompressed::create(size(), 0,
|
||||||
|
[n = column_length]
|
||||||
|
{
|
||||||
|
return ColumnTuple::create(n);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
size_t byte_size = 0;
|
size_t byte_size = 0;
|
||||||
Columns compressed;
|
Columns compressed;
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size = 2)
|
static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size)
|
||||||
{
|
{
|
||||||
if ((end - pos) & 1)
|
if ((end - pos) & 1)
|
||||||
{
|
{
|
||||||
@ -23,7 +23,7 @@ static void inline hexStringDecode(const char * pos, const char * end, char *& o
|
|||||||
++out;
|
++out;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void inline binStringDecode(const char * pos, const char * end, char *& out)
|
static void inline binStringDecode(const char * pos, const char * end, char *& out, size_t word_size)
|
||||||
{
|
{
|
||||||
if (pos == end)
|
if (pos == end)
|
||||||
{
|
{
|
||||||
@ -53,7 +53,7 @@ static void inline binStringDecode(const char * pos, const char * end, char *& o
|
|||||||
++out;
|
++out;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert((end - pos) % 8 == 0);
|
chassert((end - pos) % word_size == 0);
|
||||||
|
|
||||||
while (end - pos != 0)
|
while (end - pos != 0)
|
||||||
{
|
{
|
||||||
|
@ -1,184 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <base/defines.h>
|
|
||||||
|
|
||||||
#include <Common/Exception.h>
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <memory>
|
|
||||||
#include <typeindex>
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
|
||||||
{
|
|
||||||
|
|
||||||
namespace ErrorCodes
|
|
||||||
{
|
|
||||||
extern const int LOGICAL_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This is a collections of objects derived from ItemBase.
|
|
||||||
* Collection contains no more than one instance for each derived type.
|
|
||||||
* The derived type is used to access the instance.
|
|
||||||
*/
|
|
||||||
|
|
||||||
template<class ItemBase>
|
|
||||||
class CollectionOfDerivedItems
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
using Self = CollectionOfDerivedItems<ItemBase>;
|
|
||||||
using ItemPtr = std::shared_ptr<ItemBase>;
|
|
||||||
|
|
||||||
private:
|
|
||||||
struct Rec
|
|
||||||
{
|
|
||||||
std::type_index type_idx;
|
|
||||||
ItemPtr ptr;
|
|
||||||
|
|
||||||
bool operator<(const Rec & other) const
|
|
||||||
{
|
|
||||||
return type_idx < other.type_idx;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator<(const std::type_index & value) const
|
|
||||||
{
|
|
||||||
return type_idx < value;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator==(const Rec & other) const
|
|
||||||
{
|
|
||||||
return type_idx == other.type_idx;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
using Records = std::vector<Rec>;
|
|
||||||
|
|
||||||
public:
|
|
||||||
void swap(Self & other) noexcept
|
|
||||||
{
|
|
||||||
records.swap(other.records);
|
|
||||||
}
|
|
||||||
|
|
||||||
void clear()
|
|
||||||
{
|
|
||||||
records.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool empty() const
|
|
||||||
{
|
|
||||||
return records.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t size() const
|
|
||||||
{
|
|
||||||
return records.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
Self clone() const
|
|
||||||
{
|
|
||||||
Self result;
|
|
||||||
result.records.reserve(records.size());
|
|
||||||
for (const auto & rec : records)
|
|
||||||
result.records.emplace_back(rec.type_idx, rec.ptr->clone());
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
void append(Self && other)
|
|
||||||
{
|
|
||||||
auto middle_idx = records.size();
|
|
||||||
std::move(other.records.begin(), other.records.end(), std::back_inserter(records));
|
|
||||||
std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end());
|
|
||||||
chassert(isUniqTypes());
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
void add(std::shared_ptr<T> info)
|
|
||||||
{
|
|
||||||
static_assert(std::is_base_of_v<ItemBase, T>, "Template parameter must inherit items base class");
|
|
||||||
return addImpl(std::type_index(typeid(T)), std::move(info));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
std::shared_ptr<T> get() const
|
|
||||||
{
|
|
||||||
static_assert(std::is_base_of_v<ItemBase, T>, "Template parameter must inherit items base class");
|
|
||||||
auto it = getImpl(std::type_index(typeid(T)));
|
|
||||||
if (it == records.cend())
|
|
||||||
return nullptr;
|
|
||||||
auto cast = std::dynamic_pointer_cast<T>(it->ptr);
|
|
||||||
chassert(cast);
|
|
||||||
return cast;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
std::shared_ptr<T> extract()
|
|
||||||
{
|
|
||||||
static_assert(std::is_base_of_v<ItemBase, T>, "Template parameter must inherit items base class");
|
|
||||||
auto it = getImpl(std::type_index(typeid(T)));
|
|
||||||
if (it == records.cend())
|
|
||||||
return nullptr;
|
|
||||||
auto cast = std::dynamic_pointer_cast<T>(it->ptr);
|
|
||||||
chassert(cast);
|
|
||||||
|
|
||||||
records.erase(it);
|
|
||||||
return cast;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string debug() const
|
|
||||||
{
|
|
||||||
std::string result;
|
|
||||||
|
|
||||||
for (auto & rec : records)
|
|
||||||
{
|
|
||||||
result.append(rec.type_idx.name());
|
|
||||||
result.append(" ");
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
bool isUniqTypes() const
|
|
||||||
{
|
|
||||||
auto uniq_it = std::adjacent_find(records.begin(), records.end());
|
|
||||||
|
|
||||||
return uniq_it == records.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
void addImpl(std::type_index type_idx, ItemPtr item)
|
|
||||||
{
|
|
||||||
auto it = std::lower_bound(records.begin(), records.end(), type_idx);
|
|
||||||
|
|
||||||
if (it == records.end())
|
|
||||||
{
|
|
||||||
records.emplace_back(type_idx, item);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (it->type_idx == type_idx)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name());
|
|
||||||
|
|
||||||
|
|
||||||
records.emplace(it, type_idx, item);
|
|
||||||
|
|
||||||
chassert(isUniqTypes());
|
|
||||||
}
|
|
||||||
|
|
||||||
Records::const_iterator getImpl(std::type_index type_idx) const
|
|
||||||
{
|
|
||||||
auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx);
|
|
||||||
|
|
||||||
if (it == records.cend())
|
|
||||||
return records.cend();
|
|
||||||
|
|
||||||
if (it->type_idx != type_idx)
|
|
||||||
return records.cend();
|
|
||||||
|
|
||||||
return it;
|
|
||||||
}
|
|
||||||
|
|
||||||
Records records;
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
@ -36,7 +36,7 @@ class IColumn;
|
|||||||
M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\
|
M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\
|
||||||
M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \
|
M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \
|
||||||
M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \
|
M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \
|
||||||
M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \
|
M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \
|
||||||
M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \
|
M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \
|
||||||
M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \
|
M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \
|
||||||
M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \
|
M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \
|
||||||
@ -634,8 +634,9 @@ class IColumn;
|
|||||||
M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \
|
M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \
|
||||||
M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
|
M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
|
||||||
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \
|
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \
|
||||||
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \
|
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
|
||||||
M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \
|
M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \
|
||||||
|
M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \
|
||||||
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
|
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
|
||||||
M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \
|
M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \
|
||||||
M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW <name> REFRESH ...).", 0) \
|
M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW <name> REFRESH ...).", 0) \
|
||||||
@ -953,7 +954,6 @@ class IColumn;
|
|||||||
|
|
||||||
#define OBSOLETE_SETTINGS(M, ALIAS) \
|
#define OBSOLETE_SETTINGS(M, ALIAS) \
|
||||||
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
|
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
|
||||||
MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 1) \
|
|
||||||
MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \
|
MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \
|
||||||
MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \
|
MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \
|
||||||
MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \
|
MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \
|
||||||
|
@ -2,9 +2,11 @@
|
|||||||
#include <DataTypes/Serializations/SerializationDynamic.h>
|
#include <DataTypes/Serializations/SerializationDynamic.h>
|
||||||
#include <DataTypes/Serializations/SerializationDynamicElement.h>
|
#include <DataTypes/Serializations/SerializationDynamicElement.h>
|
||||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||||
|
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
#include <DataTypes/NestedUtils.h>
|
#include <DataTypes/NestedUtils.h>
|
||||||
#include <DataTypes/DataTypeNullable.h>
|
#include <DataTypes/DataTypeNullable.h>
|
||||||
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
#include <Columns/ColumnDynamic.h>
|
#include <Columns/ColumnDynamic.h>
|
||||||
#include <Columns/ColumnVariant.h>
|
#include <Columns/ColumnVariant.h>
|
||||||
#include <Core/Field.h>
|
#include <Core/Field.h>
|
||||||
@ -110,28 +112,58 @@ std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnDa
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Extract nested subcolumn of requested dynamic subcolumn if needed.
|
/// Extract nested subcolumn of requested dynamic subcolumn if needed.
|
||||||
if (!subcolumn_nested_name.empty())
|
/// If requested subcolumn is null map, it's processed separately as there is no Nullable type yet.
|
||||||
|
bool is_null_map_subcolumn = subcolumn_nested_name == "null";
|
||||||
|
if (is_null_map_subcolumn)
|
||||||
|
{
|
||||||
|
res->type = std::make_shared<DataTypeUInt8>();
|
||||||
|
}
|
||||||
|
else if (!subcolumn_nested_name.empty())
|
||||||
{
|
{
|
||||||
res = getSubcolumnData(subcolumn_nested_name, *res, throw_if_null);
|
res = getSubcolumnData(subcolumn_nested_name, *res, throw_if_null);
|
||||||
if (!res)
|
if (!res)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
res->serialization = std::make_shared<SerializationDynamicElement>(res->serialization, subcolumn_type->getName());
|
res->serialization = std::make_shared<SerializationDynamicElement>(res->serialization, subcolumn_type->getName(), is_null_map_subcolumn);
|
||||||
res->type = makeNullableOrLowCardinalityNullableSafe(res->type);
|
/// Make resulting subcolumn Nullable only if type subcolumn can be inside Nullable or can be LowCardinality(Nullable()).
|
||||||
|
bool make_subcolumn_nullable = subcolumn_type->canBeInsideNullable() || subcolumn_type->lowCardinality();
|
||||||
|
if (!is_null_map_subcolumn && make_subcolumn_nullable)
|
||||||
|
res->type = makeNullableOrLowCardinalityNullableSafe(res->type);
|
||||||
|
|
||||||
if (data.column)
|
if (data.column)
|
||||||
{
|
{
|
||||||
if (discriminator)
|
if (discriminator)
|
||||||
{
|
{
|
||||||
/// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator to
|
/// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to
|
||||||
/// create full subcolumn from variant according to discriminators.
|
/// create full subcolumn from variant according to discriminators.
|
||||||
const auto & variant_column = assert_cast<const ColumnDynamic &>(*data.column).getVariantColumn();
|
const auto & variant_column = assert_cast<const ColumnDynamic &>(*data.column).getVariantColumn();
|
||||||
auto creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), "", *discriminator, variant_column.localDiscriminatorByGlobal(*discriminator));
|
std::unique_ptr<ISerialization::ISubcolumnCreator> creator;
|
||||||
res->column = creator.create(res->column);
|
if (is_null_map_subcolumn)
|
||||||
|
creator = std::make_unique<SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator>(
|
||||||
|
variant_column.getLocalDiscriminatorsPtr(),
|
||||||
|
"",
|
||||||
|
*discriminator,
|
||||||
|
variant_column.localDiscriminatorByGlobal(*discriminator));
|
||||||
|
else
|
||||||
|
creator = std::make_unique<SerializationVariantElement::VariantSubcolumnCreator>(
|
||||||
|
variant_column.getLocalDiscriminatorsPtr(),
|
||||||
|
"",
|
||||||
|
*discriminator,
|
||||||
|
variant_column.localDiscriminatorByGlobal(*discriminator),
|
||||||
|
make_subcolumn_nullable);
|
||||||
|
res->column = creator->create(res->column);
|
||||||
|
}
|
||||||
|
/// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values.
|
||||||
|
else if (is_null_map_subcolumn)
|
||||||
|
{
|
||||||
|
/// Fill null map with 1 when there is no such Dynamic subcolumn.
|
||||||
|
auto column = ColumnUInt8::create();
|
||||||
|
assert_cast<ColumnUInt8 &>(*column).getData().resize_fill(data.column->size(), 1);
|
||||||
|
res->column = std::move(column);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values.
|
|
||||||
auto column = res->type->createColumn();
|
auto column = res->type->createColumn();
|
||||||
column->insertManyDefaults(data.column->size());
|
column->insertManyDefaults(data.column->size());
|
||||||
res->column = std::move(column);
|
res->column = std::move(column);
|
||||||
|
@ -173,7 +173,7 @@ bool IDataType::hasDynamicSubcolumns() const
|
|||||||
auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
|
auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
|
||||||
auto callback = [&](const SubstreamPath &, const String &, const SubstreamData & subcolumn_data)
|
auto callback = [&](const SubstreamPath &, const String &, const SubstreamData & subcolumn_data)
|
||||||
{
|
{
|
||||||
has_dynamic_subcolumns |= subcolumn_data.type->hasDynamicSubcolumnsData();
|
has_dynamic_subcolumns |= subcolumn_data.type && subcolumn_data.type->hasDynamicSubcolumnsData();
|
||||||
};
|
};
|
||||||
forEachSubcolumn(callback, data);
|
forEachSubcolumn(callback, data);
|
||||||
return has_dynamic_subcolumns;
|
return has_dynamic_subcolumns;
|
||||||
|
@ -64,6 +64,9 @@ String ISerialization::Substream::toString() const
|
|||||||
if (type == VariantElement)
|
if (type == VariantElement)
|
||||||
return fmt::format("VariantElement({})", variant_element_name);
|
return fmt::format("VariantElement({})", variant_element_name);
|
||||||
|
|
||||||
|
if (type == VariantElementNullMap)
|
||||||
|
return fmt::format("VariantElementNullMap({}.null)", variant_element_name);
|
||||||
|
|
||||||
return String(magic_enum::enum_name(type));
|
return String(magic_enum::enum_name(type));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -195,6 +198,8 @@ String getNameForSubstreamPath(
|
|||||||
stream_name += ".variant_offsets";
|
stream_name += ".variant_offsets";
|
||||||
else if (it->type == Substream::VariantElement)
|
else if (it->type == Substream::VariantElement)
|
||||||
stream_name += "." + it->variant_element_name;
|
stream_name += "." + it->variant_element_name;
|
||||||
|
else if (it->type == Substream::VariantElementNullMap)
|
||||||
|
stream_name += "." + it->variant_element_name + ".null";
|
||||||
else if (it->type == SubstreamType::DynamicStructure)
|
else if (it->type == SubstreamType::DynamicStructure)
|
||||||
stream_name += ".dynamic_structure";
|
stream_name += ".dynamic_structure";
|
||||||
}
|
}
|
||||||
@ -395,7 +400,8 @@ bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t pref
|
|||||||
return path[last_elem].type == Substream::NullMap
|
return path[last_elem].type == Substream::NullMap
|
||||||
|| path[last_elem].type == Substream::TupleElement
|
|| path[last_elem].type == Substream::TupleElement
|
||||||
|| path[last_elem].type == Substream::ArraySizes
|
|| path[last_elem].type == Substream::ArraySizes
|
||||||
|| path[last_elem].type == Substream::VariantElement;
|
|| path[last_elem].type == Substream::VariantElement
|
||||||
|
|| path[last_elem].type == Substream::VariantElementNullMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len)
|
ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len)
|
||||||
|
@ -184,6 +184,7 @@ public:
|
|||||||
VariantOffsets,
|
VariantOffsets,
|
||||||
VariantElements,
|
VariantElements,
|
||||||
VariantElement,
|
VariantElement,
|
||||||
|
VariantElementNullMap,
|
||||||
|
|
||||||
DynamicData,
|
DynamicData,
|
||||||
DynamicStructure,
|
DynamicStructure,
|
||||||
@ -256,6 +257,8 @@ public:
|
|||||||
|
|
||||||
bool position_independent_encoding = true;
|
bool position_independent_encoding = true;
|
||||||
|
|
||||||
|
bool use_compact_variant_discriminators_serialization = false;
|
||||||
|
|
||||||
enum class DynamicStatisticsMode
|
enum class DynamicStatisticsMode
|
||||||
{
|
{
|
||||||
NONE, /// Don't write statistics.
|
NONE, /// Don't write statistics.
|
||||||
@ -434,6 +437,9 @@ protected:
|
|||||||
template <typename State, typename StatePtr>
|
template <typename State, typename StatePtr>
|
||||||
State * checkAndGetState(const StatePtr & state) const;
|
State * checkAndGetState(const StatePtr & state) const;
|
||||||
|
|
||||||
|
template <typename State, typename StatePtr>
|
||||||
|
static State * checkAndGetState(const StatePtr & state, const ISerialization * serialization);
|
||||||
|
|
||||||
[[noreturn]] void throwUnexpectedDataAfterParsedValue(IColumn & column, ReadBuffer & istr, const FormatSettings &, const String & type_name) const;
|
[[noreturn]] void throwUnexpectedDataAfterParsedValue(IColumn & column, ReadBuffer & istr, const FormatSettings &, const String & type_name) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -444,10 +450,16 @@ using SubstreamType = ISerialization::Substream::Type;
|
|||||||
|
|
||||||
template <typename State, typename StatePtr>
|
template <typename State, typename StatePtr>
|
||||||
State * ISerialization::checkAndGetState(const StatePtr & state) const
|
State * ISerialization::checkAndGetState(const StatePtr & state) const
|
||||||
|
{
|
||||||
|
return checkAndGetState<State, StatePtr>(state, this);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename State, typename StatePtr>
|
||||||
|
State * ISerialization::checkAndGetState(const StatePtr & state, const ISerialization * serialization)
|
||||||
{
|
{
|
||||||
if (!state)
|
if (!state)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||||
"Got empty state for {}", demangle(typeid(*this).name()));
|
"Got empty state for {}", demangle(typeid(*serialization).name()));
|
||||||
|
|
||||||
auto * state_concrete = typeid_cast<State *>(state.get());
|
auto * state_concrete = typeid_cast<State *>(state.get());
|
||||||
if (!state_concrete)
|
if (!state_concrete)
|
||||||
@ -455,7 +467,7 @@ State * ISerialization::checkAndGetState(const StatePtr & state) const
|
|||||||
auto & state_ref = *state;
|
auto & state_ref = *state;
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||||
"Invalid State for {}. Expected: {}, got {}",
|
"Invalid State for {}. Expected: {}, got {}",
|
||||||
demangle(typeid(*this).name()),
|
demangle(typeid(*serialization).name()),
|
||||||
demangle(typeid(State).name()),
|
demangle(typeid(State).name()),
|
||||||
demangle(typeid(state_ref).name()));
|
demangle(typeid(state_ref).name()));
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include <DataTypes/Serializations/SerializationDynamicElement.h>
|
#include <DataTypes/Serializations/SerializationDynamicElement.h>
|
||||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||||
|
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||||
#include <DataTypes/Serializations/SerializationDynamic.h>
|
#include <DataTypes/Serializations/SerializationDynamic.h>
|
||||||
#include <DataTypes/DataTypeVariant.h>
|
#include <DataTypes/DataTypeVariant.h>
|
||||||
#include <DataTypes/DataTypeFactory.h>
|
#include <DataTypes/DataTypeFactory.h>
|
||||||
@ -77,7 +78,10 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix(
|
|||||||
if (auto global_discr = assert_cast<const DataTypeVariant &>(*variant_type).tryGetVariantDiscriminator(dynamic_element_name))
|
if (auto global_discr = assert_cast<const DataTypeVariant &>(*variant_type).tryGetVariantDiscriminator(dynamic_element_name))
|
||||||
{
|
{
|
||||||
settings.path.push_back(Substream::DynamicData);
|
settings.path.push_back(Substream::DynamicData);
|
||||||
dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElement>(nested_serialization, dynamic_element_name, *global_discr);
|
if (is_null_map_subcolumn)
|
||||||
|
dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElementNullMap>(dynamic_element_name, *global_discr);
|
||||||
|
else
|
||||||
|
dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElement>(nested_serialization, dynamic_element_name, *global_discr);
|
||||||
dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache);
|
dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache);
|
||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
}
|
}
|
||||||
@ -98,7 +102,16 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams(
|
|||||||
SubstreamsCache * cache) const
|
SubstreamsCache * cache) const
|
||||||
{
|
{
|
||||||
if (!state)
|
if (!state)
|
||||||
|
{
|
||||||
|
if (is_null_map_subcolumn)
|
||||||
|
{
|
||||||
|
auto mutable_column = result_column->assumeMutable();
|
||||||
|
auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||||
|
data.resize_fill(data.size() + limit, 1);
|
||||||
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
auto * dynamic_element_state = checkAndGetState<DeserializeBinaryBulkStateDynamicElement>(state);
|
auto * dynamic_element_state = checkAndGetState<DeserializeBinaryBulkStateDynamicElement>(state);
|
||||||
|
|
||||||
@ -108,6 +121,12 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams(
|
|||||||
dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache);
|
dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache);
|
||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
}
|
}
|
||||||
|
else if (is_null_map_subcolumn)
|
||||||
|
{
|
||||||
|
auto mutable_column = result_column->assumeMutable();
|
||||||
|
auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||||
|
data.resize_fill(data.size() + limit, 1);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto mutable_column = result_column->assumeMutable();
|
auto mutable_column = result_column->assumeMutable();
|
||||||
|
@ -13,11 +13,11 @@ private:
|
|||||||
/// To be able to deserialize Dynamic element as a subcolumn
|
/// To be able to deserialize Dynamic element as a subcolumn
|
||||||
/// we need its type name and global discriminator.
|
/// we need its type name and global discriminator.
|
||||||
String dynamic_element_name;
|
String dynamic_element_name;
|
||||||
|
bool is_null_map_subcolumn;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_)
|
SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, bool is_null_map_subcolumn_ = false)
|
||||||
: SerializationWrapper(nested_)
|
: SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), is_null_map_subcolumn(is_null_map_subcolumn_)
|
||||||
, dynamic_element_name(dynamic_element_name_)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include <DataTypes/Serializations/SerializationVariant.h>
|
#include <DataTypes/Serializations/SerializationVariant.h>
|
||||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||||
|
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||||
#include <DataTypes/Serializations/SerializationNumber.h>
|
#include <DataTypes/Serializations/SerializationNumber.h>
|
||||||
#include <DataTypes/Serializations/SerializationNullable.h>
|
#include <DataTypes/Serializations/SerializationNullable.h>
|
||||||
#include <DataTypes/Serializations/SerializationNamed.h>
|
#include <DataTypes/Serializations/SerializationNamed.h>
|
||||||
@ -30,12 +31,18 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState
|
struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState
|
||||||
{
|
{
|
||||||
std::vector<ISerialization::SerializeBinaryBulkStatePtr> states;
|
explicit SerializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
SerializationVariant::DiscriminatorsSerializationMode discriminators_mode;
|
||||||
|
std::vector<ISerialization::SerializeBinaryBulkStatePtr> variant_states;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState
|
struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState
|
||||||
{
|
{
|
||||||
std::vector<ISerialization::DeserializeBinaryBulkStatePtr> states;
|
ISerialization::DeserializeBinaryBulkStatePtr discriminators_state;
|
||||||
|
std::vector<ISerialization::DeserializeBinaryBulkStatePtr> variant_states;
|
||||||
};
|
};
|
||||||
|
|
||||||
void SerializationVariant::enumerateStreams(
|
void SerializationVariant::enumerateStreams(
|
||||||
@ -65,13 +72,19 @@ void SerializationVariant::enumerateStreams(
|
|||||||
|
|
||||||
for (size_t i = 0; i < variants.size(); ++i)
|
for (size_t i = 0; i < variants.size(); ++i)
|
||||||
{
|
{
|
||||||
settings.path.back().creator = std::make_shared<SerializationVariantElement::VariantSubcolumnCreator>(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i);
|
DataTypePtr type = type_variant ? type_variant->getVariant(i) : nullptr;
|
||||||
|
settings.path.back().creator = std::make_shared<SerializationVariantElement::VariantSubcolumnCreator>(
|
||||||
|
local_discriminators,
|
||||||
|
variant_names[i],
|
||||||
|
i,
|
||||||
|
column_variant ? column_variant->localDiscriminatorByGlobal(i) : i,
|
||||||
|
!type || type->canBeInsideNullable() || type->lowCardinality());
|
||||||
|
|
||||||
auto variant_data = SubstreamData(variants[i])
|
auto variant_data = SubstreamData(variants[i])
|
||||||
.withType(type_variant ? type_variant->getVariant(i) : nullptr)
|
.withType(type)
|
||||||
.withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr)
|
.withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr)
|
||||||
.withSerializationInfo(data.serialization_info)
|
.withSerializationInfo(data.serialization_info)
|
||||||
.withDeserializeState(variant_deserialize_state ? variant_deserialize_state->states[i] : nullptr);
|
.withDeserializeState(variant_deserialize_state ? variant_deserialize_state->variant_states[i] : nullptr);
|
||||||
|
|
||||||
addVariantElementToPath(settings.path, i);
|
addVariantElementToPath(settings.path, i);
|
||||||
settings.path.back().data = variant_data;
|
settings.path.back().data = variant_data;
|
||||||
@ -79,6 +92,24 @@ void SerializationVariant::enumerateStreams(
|
|||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Variant subcolumns like variant.Type have type Nullable(Type), so we want to support reading null map subcolumn from it: variant.Type.null.
|
||||||
|
/// Nullable column is created during deserialization of a variant subcolumn according to the discriminators, so we don't have actual Nullable
|
||||||
|
/// serialization with null map subcolumn. To be able to read null map subcolumn from the variant subcolumn we use special serialization
|
||||||
|
/// SerializationVariantElementNullMap.
|
||||||
|
auto null_map_data = SubstreamData(std::make_shared<SerializationNumber<UInt8>>())
|
||||||
|
.withType(type_variant ? std::make_shared<DataTypeUInt8>() : nullptr)
|
||||||
|
.withColumn(column_variant ? ColumnUInt8::create() : nullptr);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < variants.size(); ++i)
|
||||||
|
{
|
||||||
|
settings.path.back().creator = std::make_shared<SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator>(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i);
|
||||||
|
settings.path.push_back(Substream::VariantElementNullMap);
|
||||||
|
settings.path.back().variant_element_name = variant_names[i];
|
||||||
|
settings.path.back().data = null_map_data;
|
||||||
|
callback(settings.path);
|
||||||
|
settings.path.pop_back();
|
||||||
|
}
|
||||||
|
|
||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,17 +118,26 @@ void SerializationVariant::serializeBinaryBulkStatePrefix(
|
|||||||
SerializeBinaryBulkSettings & settings,
|
SerializeBinaryBulkSettings & settings,
|
||||||
SerializeBinaryBulkStatePtr & state) const
|
SerializeBinaryBulkStatePtr & state) const
|
||||||
{
|
{
|
||||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
settings.path.push_back(Substream::VariantDiscriminators);
|
||||||
|
auto * discriminators_stream = settings.getter(settings.path);
|
||||||
|
settings.path.pop_back();
|
||||||
|
|
||||||
auto variant_state = std::make_shared<SerializeBinaryBulkStateVariant>();
|
if (!discriminators_stream)
|
||||||
variant_state->states.resize(variants.size());
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for VariantDiscriminators in SerializationVariant::serializeBinaryBulkStatePrefix");
|
||||||
|
|
||||||
|
UInt64 mode = settings.use_compact_variant_discriminators_serialization ? DiscriminatorsSerializationMode::COMPACT : DiscriminatorsSerializationMode::BASIC;
|
||||||
|
writeBinaryLittleEndian(mode, *discriminators_stream);
|
||||||
|
|
||||||
|
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||||
|
auto variant_state = std::make_shared<SerializeBinaryBulkStateVariant>(mode);
|
||||||
|
variant_state->variant_states.resize(variants.size());
|
||||||
|
|
||||||
settings.path.push_back(Substream::VariantElements);
|
settings.path.push_back(Substream::VariantElements);
|
||||||
|
|
||||||
for (size_t i = 0; i < variants.size(); ++i)
|
for (size_t i = 0; i < variants.size(); ++i)
|
||||||
{
|
{
|
||||||
addVariantElementToPath(settings.path, i);
|
addVariantElementToPath(settings.path, i);
|
||||||
variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->states[i]);
|
variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->variant_states[i]);
|
||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -116,7 +156,7 @@ void SerializationVariant::serializeBinaryBulkStateSuffix(
|
|||||||
for (size_t i = 0; i < variants.size(); ++i)
|
for (size_t i = 0; i < variants.size(); ++i)
|
||||||
{
|
{
|
||||||
addVariantElementToPath(settings.path, i);
|
addVariantElementToPath(settings.path, i);
|
||||||
variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->states[i]);
|
variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->variant_states[i]);
|
||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
}
|
}
|
||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
@ -128,14 +168,19 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix(
|
|||||||
DeserializeBinaryBulkStatePtr & state,
|
DeserializeBinaryBulkStatePtr & state,
|
||||||
SubstreamsDeserializeStatesCache * cache) const
|
SubstreamsDeserializeStatesCache * cache) const
|
||||||
{
|
{
|
||||||
|
DeserializeBinaryBulkStatePtr discriminators_state = deserializeDiscriminatorsStatePrefix(settings, cache);
|
||||||
|
if (!discriminators_state)
|
||||||
|
return;
|
||||||
|
|
||||||
auto variant_state = std::make_shared<DeserializeBinaryBulkStateVariant>();
|
auto variant_state = std::make_shared<DeserializeBinaryBulkStateVariant>();
|
||||||
variant_state->states.resize(variants.size());
|
variant_state->discriminators_state = discriminators_state;
|
||||||
|
variant_state->variant_states.resize(variants.size());
|
||||||
|
|
||||||
settings.path.push_back(Substream::VariantElements);
|
settings.path.push_back(Substream::VariantElements);
|
||||||
for (size_t i = 0; i < variants.size(); ++i)
|
for (size_t i = 0; i < variants.size(); ++i)
|
||||||
{
|
{
|
||||||
addVariantElementToPath(settings.path, i);
|
addVariantElementToPath(settings.path, i);
|
||||||
variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i], cache);
|
variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->variant_states[i], cache);
|
||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,6 +188,29 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix(
|
|||||||
state = std::move(variant_state);
|
state = std::move(variant_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ISerialization::DeserializeBinaryBulkStatePtr SerializationVariant::deserializeDiscriminatorsStatePrefix(
|
||||||
|
DeserializeBinaryBulkSettings & settings,
|
||||||
|
SubstreamsDeserializeStatesCache * cache)
|
||||||
|
{
|
||||||
|
settings.path.push_back(Substream::VariantDiscriminators);
|
||||||
|
|
||||||
|
DeserializeBinaryBulkStatePtr discriminators_state = nullptr;
|
||||||
|
if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path))
|
||||||
|
{
|
||||||
|
discriminators_state = cached_state;
|
||||||
|
}
|
||||||
|
else if (auto * discriminators_stream = settings.getter(settings.path))
|
||||||
|
{
|
||||||
|
UInt64 mode;
|
||||||
|
readBinaryLittleEndian(mode, *discriminators_stream);
|
||||||
|
discriminators_state = std::make_shared<DeserializeBinaryBulkStateVariantDiscriminators>(mode);
|
||||||
|
addToSubstreamsDeserializeStatesCache(cache, settings.path, discriminators_state);
|
||||||
|
}
|
||||||
|
|
||||||
|
settings.path.pop_back();
|
||||||
|
return discriminators_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(
|
void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(
|
||||||
const IColumn & column,
|
const IColumn & column,
|
||||||
@ -165,13 +233,71 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
|||||||
|
|
||||||
auto * variant_state = checkAndGetState<SerializeBinaryBulkStateVariant>(state);
|
auto * variant_state = checkAndGetState<SerializeBinaryBulkStateVariant>(state);
|
||||||
|
|
||||||
/// If offset = 0 and limit == col.size() or we have only NULLs, we don't need to calculate
|
/// Don't write anything if column is empty.
|
||||||
|
if (limit == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/// Write number of rows in this granule in compact mode.
|
||||||
|
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
|
||||||
|
writeVarUInt(UInt64(limit), *discriminators_stream);
|
||||||
|
|
||||||
|
/// If column has only one none empty discriminators and no NULLs we don't need to
|
||||||
|
/// calculate limits for variants and use provided offset/limit.
|
||||||
|
if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
|
||||||
|
{
|
||||||
|
auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr);
|
||||||
|
|
||||||
|
/// In compact mode write the format of the granule and single non-empty discriminator.
|
||||||
|
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
|
||||||
|
{
|
||||||
|
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
|
||||||
|
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
|
||||||
|
}
|
||||||
|
/// For basic mode just serialize this discriminator limit times.
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < limit; ++i)
|
||||||
|
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
settings.path.push_back(Substream::VariantElements);
|
||||||
|
addVariantElementToPath(settings.path, non_empty_global_discr);
|
||||||
|
/// We can use the same offset/limit as for whole Variant column
|
||||||
|
variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->variant_states[non_empty_global_discr]);
|
||||||
|
variants_statistics[variant_names[non_empty_global_discr]] += limit;
|
||||||
|
settings.path.pop_back();
|
||||||
|
settings.path.pop_back();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/// If column has only NULLs, just serialize NULL discriminators.
|
||||||
|
else if (col.hasOnlyNulls())
|
||||||
|
{
|
||||||
|
/// In compact mode write single NULL_DISCRIMINATOR.
|
||||||
|
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
|
||||||
|
{
|
||||||
|
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
|
||||||
|
writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream);
|
||||||
|
}
|
||||||
|
/// In basic mode write NULL_DISCRIMINATOR limit times.
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < limit; ++i)
|
||||||
|
writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If offset = 0 and limit == col.size() we don't need to calculate
|
||||||
/// offsets and limits for variants and need to just serialize whole columns.
|
/// offsets and limits for variants and need to just serialize whole columns.
|
||||||
if ((offset == 0 && limit == col.size()) || col.hasOnlyNulls())
|
if ((offset == 0 && limit == col.size()))
|
||||||
{
|
{
|
||||||
/// First, serialize discriminators.
|
/// First, serialize discriminators.
|
||||||
/// If we have only NULLs or local and global discriminators are the same, just serialize the column as is.
|
/// Here we are sure that column contains different discriminators, use plain granule format in compact mode.
|
||||||
if (col.hasOnlyNulls() || col.hasGlobalVariantsOrder())
|
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
|
||||||
|
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream);
|
||||||
|
|
||||||
|
/// If local and global discriminators are the same, just serialize the column as is.
|
||||||
|
if (col.hasGlobalVariantsOrder())
|
||||||
{
|
{
|
||||||
SerializationNumber<ColumnVariant::Discriminator>().serializeBinaryBulk(col.getLocalDiscriminatorsColumn(), *discriminators_stream, offset, limit);
|
SerializationNumber<ColumnVariant::Discriminator>().serializeBinaryBulk(col.getLocalDiscriminatorsColumn(), *discriminators_stream, offset, limit);
|
||||||
}
|
}
|
||||||
@ -188,7 +314,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
|||||||
for (size_t i = 0; i != variants.size(); ++i)
|
for (size_t i = 0; i != variants.size(); ++i)
|
||||||
{
|
{
|
||||||
addVariantElementToPath(settings.path, i);
|
addVariantElementToPath(settings.path, i);
|
||||||
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]);
|
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->variant_states[i]);
|
||||||
variants_statistics[variant_names[i]] += col.getVariantByGlobalDiscriminator(i).size();
|
variants_statistics[variant_names[i]] += col.getVariantByGlobalDiscriminator(i).size();
|
||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
}
|
}
|
||||||
@ -196,36 +322,16 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If we have only one non empty variant and no NULLs, we can use the same limit offset for this variant.
|
|
||||||
if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
|
|
||||||
{
|
|
||||||
/// First, serialize discriminators.
|
|
||||||
/// We know that all discriminators are the same, so we just need to serialize this discriminator limit times.
|
|
||||||
auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr);
|
|
||||||
for (size_t i = 0; i != limit; ++i)
|
|
||||||
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
|
|
||||||
|
|
||||||
/// Second, serialize non-empty variant (other variants are empty and we can skip their serialization).
|
|
||||||
settings.path.push_back(Substream::VariantElements);
|
|
||||||
addVariantElementToPath(settings.path, non_empty_global_discr);
|
|
||||||
/// We can use the same offset/limit as for whole Variant column
|
|
||||||
variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]);
|
|
||||||
variants_statistics[variant_names[non_empty_global_discr]] += limit;
|
|
||||||
settings.path.pop_back();
|
|
||||||
settings.path.pop_back();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// In general case we should iterate through local discriminators in range [offset, offset + limit] to serialize global discriminators and calculate offset/limit pair for each variant.
|
/// In general case we should iterate through local discriminators in range [offset, offset + limit] to serialize global discriminators and calculate offset/limit pair for each variant.
|
||||||
const auto & local_discriminators = col.getLocalDiscriminators();
|
const auto & local_discriminators = col.getLocalDiscriminators();
|
||||||
const auto & offsets = col.getOffsets();
|
const auto & offsets = col.getOffsets();
|
||||||
std::vector<std::pair<size_t, size_t>> variant_offsets_and_limits(variants.size(), {0, 0});
|
std::vector<std::pair<size_t, size_t>> variant_offsets_and_limits(variants.size(), {0, 0});
|
||||||
size_t end = offset + limit;
|
size_t end = offset + limit;
|
||||||
|
size_t num_non_empty_variants_in_range = 0;
|
||||||
|
ColumnVariant::Discriminator last_non_empty_variant_discr = 0;
|
||||||
for (size_t i = offset; i < end; ++i)
|
for (size_t i = offset; i < end; ++i)
|
||||||
{
|
{
|
||||||
auto global_discr = col.globalDiscriminatorByLocal(local_discriminators[i]);
|
auto global_discr = col.globalDiscriminatorByLocal(local_discriminators[i]);
|
||||||
writeBinaryLittleEndian(global_discr, *discriminators_stream);
|
|
||||||
|
|
||||||
if (global_discr != ColumnVariant::NULL_DISCRIMINATOR)
|
if (global_discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||||
{
|
{
|
||||||
/// If we see this discriminator for the first time, update offset
|
/// If we see this discriminator for the first time, update offset
|
||||||
@ -233,9 +339,38 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
|||||||
variant_offsets_and_limits[global_discr].first = offsets[i];
|
variant_offsets_and_limits[global_discr].first = offsets[i];
|
||||||
/// Update limit for this discriminator.
|
/// Update limit for this discriminator.
|
||||||
++variant_offsets_and_limits[global_discr].second;
|
++variant_offsets_and_limits[global_discr].second;
|
||||||
|
++num_non_empty_variants_in_range;
|
||||||
|
last_non_empty_variant_discr = global_discr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// In basic mode just serialize discriminators as is row by row.
|
||||||
|
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::BASIC)
|
||||||
|
{
|
||||||
|
for (size_t i = offset; i < end; ++i)
|
||||||
|
writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream);
|
||||||
|
}
|
||||||
|
/// In compact mode check if we have the same discriminator for all rows in this granule.
|
||||||
|
/// First, check if all values in granule are NULLs.
|
||||||
|
else if (num_non_empty_variants_in_range == 0)
|
||||||
|
{
|
||||||
|
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
|
||||||
|
writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream);
|
||||||
|
}
|
||||||
|
/// Then, check if there is only 1 variant and no NULLs in this granule.
|
||||||
|
else if (num_non_empty_variants_in_range == 1 && variant_offsets_and_limits[last_non_empty_variant_discr].second == limit)
|
||||||
|
{
|
||||||
|
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
|
||||||
|
writeBinaryLittleEndian(last_non_empty_variant_discr, *discriminators_stream);
|
||||||
|
}
|
||||||
|
/// Otherwise there are different discriminators in this granule.
|
||||||
|
else
|
||||||
|
{
|
||||||
|
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream);
|
||||||
|
for (size_t i = offset; i < end; ++i)
|
||||||
|
writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream);
|
||||||
|
}
|
||||||
|
|
||||||
/// Serialize variants in global order.
|
/// Serialize variants in global order.
|
||||||
settings.path.push_back(Substream::VariantElements);
|
settings.path.push_back(Substream::VariantElements);
|
||||||
for (size_t i = 0; i != variants.size(); ++i)
|
for (size_t i = 0; i != variants.size(); ++i)
|
||||||
@ -249,7 +384,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
|||||||
variant_offsets_and_limits[i].first,
|
variant_offsets_and_limits[i].first,
|
||||||
variant_offsets_and_limits[i].second,
|
variant_offsets_and_limits[i].second,
|
||||||
settings,
|
settings,
|
||||||
variant_state->states[i]);
|
variant_state->variant_states[i]);
|
||||||
variants_statistics[variant_names[i]] += variant_offsets_and_limits[i].second;
|
variants_statistics[variant_names[i]] += variant_offsets_and_limits[i].second;
|
||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
}
|
}
|
||||||
@ -284,39 +419,68 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
|
|||||||
|
|
||||||
/// First, deserialize discriminators.
|
/// First, deserialize discriminators.
|
||||||
settings.path.push_back(Substream::VariantDiscriminators);
|
settings.path.push_back(Substream::VariantDiscriminators);
|
||||||
|
|
||||||
|
DeserializeBinaryBulkStateVariant * variant_state = nullptr;
|
||||||
|
std::vector<size_t> variant_limits;
|
||||||
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
|
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
|
||||||
{
|
{
|
||||||
|
variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
|
||||||
col.getLocalDiscriminatorsPtr() = cached_discriminators;
|
col.getLocalDiscriminatorsPtr() = cached_discriminators;
|
||||||
}
|
}
|
||||||
else
|
else if (auto * discriminators_stream = settings.getter(settings.path))
|
||||||
{
|
{
|
||||||
auto * discriminators_stream = settings.getter(settings.path);
|
variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
|
||||||
if (!discriminators_stream)
|
auto * discriminators_state = checkAndGetState<DeserializeBinaryBulkStateVariantDiscriminators>(variant_state->discriminators_state);
|
||||||
return;
|
|
||||||
|
/// Deserialize discriminators according to serialization mode.
|
||||||
|
if (discriminators_state->mode.value == DiscriminatorsSerializationMode::BASIC)
|
||||||
|
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0);
|
||||||
|
else
|
||||||
|
variant_limits = deserializeCompactDiscriminators(col.getLocalDiscriminatorsPtr(), limit, discriminators_stream, settings.continuous_reading, *discriminators_state);
|
||||||
|
|
||||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0);
|
|
||||||
addToSubstreamsCache(cache, settings.path, col.getLocalDiscriminatorsPtr());
|
addToSubstreamsCache(cache, settings.path, col.getLocalDiscriminatorsPtr());
|
||||||
}
|
}
|
||||||
|
/// It may happen that there is no such stream, in this case just do nothing.
|
||||||
|
else
|
||||||
|
{
|
||||||
|
settings.path.pop_back();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
|
|
||||||
/// Second, calculate limits for each variant by iterating through new discriminators.
|
/// Second, calculate limits for each variant by iterating through new discriminators
|
||||||
std::vector<size_t> variant_limits(variants.size(), 0);
|
/// if we didn't do it during discriminators deserialization.
|
||||||
auto & discriminators_data = col.getLocalDiscriminators();
|
if (variant_limits.empty())
|
||||||
size_t discriminators_offset = discriminators_data.size() - limit;
|
|
||||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
|
||||||
{
|
{
|
||||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
variant_limits.resize(variants.size(), 0);
|
||||||
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
|
auto & discriminators_data = col.getLocalDiscriminators();
|
||||||
++variant_limits[discr];
|
|
||||||
|
/// We can actually read less than limit discriminators and we cannot determine the actual number of read rows
|
||||||
|
/// by discriminators column as it could be taken from the substreams cache. And we need actual number of read
|
||||||
|
/// rows to fill offsets correctly later if they are not in the cache. We can determine if offsets column is in cache
|
||||||
|
/// or not by comparing it with discriminators column size (they should be the same when offsets are in cache).
|
||||||
|
/// If offsets are not in the cache, we can use it's size to determine the actual number of read rows.
|
||||||
|
size_t num_new_discriminators = limit;
|
||||||
|
size_t offsets_size = col.getOffsetsPtr()->size();
|
||||||
|
if (discriminators_data.size() > offsets_size)
|
||||||
|
num_new_discriminators = discriminators_data.size() - offsets_size;
|
||||||
|
size_t discriminators_offset = discriminators_data.size() - num_new_discriminators;
|
||||||
|
|
||||||
|
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||||
|
{
|
||||||
|
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||||
|
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||||
|
++variant_limits[discr];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Now we can deserialize variants according to their limits.
|
/// Now we can deserialize variants according to their limits.
|
||||||
auto * variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
|
|
||||||
settings.path.push_back(Substream::VariantElements);
|
settings.path.push_back(Substream::VariantElements);
|
||||||
for (size_t i = 0; i != variants.size(); ++i)
|
for (size_t i = 0; i != variants.size(); ++i)
|
||||||
{
|
{
|
||||||
addVariantElementToPath(settings.path, i);
|
addVariantElementToPath(settings.path, i);
|
||||||
variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->states[i], cache);
|
variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->variant_states[i], cache);
|
||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
}
|
}
|
||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
@ -336,20 +500,49 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto & offsets = col.getOffsets();
|
|
||||||
offsets.reserve(offsets.size() + limit);
|
|
||||||
std::vector<size_t> variant_offsets;
|
std::vector<size_t> variant_offsets;
|
||||||
variant_offsets.reserve(variants.size());
|
variant_offsets.reserve(variants.size());
|
||||||
|
size_t num_non_empty_variants = 0;
|
||||||
|
ColumnVariant::Discriminator last_non_empty_discr = 0;
|
||||||
for (size_t i = 0; i != variants.size(); ++i)
|
for (size_t i = 0; i != variants.size(); ++i)
|
||||||
variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]);
|
|
||||||
|
|
||||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
|
||||||
{
|
{
|
||||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
if (variant_limits[i])
|
||||||
if (discr == ColumnVariant::NULL_DISCRIMINATOR)
|
{
|
||||||
offsets.emplace_back();
|
++num_non_empty_variants;
|
||||||
else
|
last_non_empty_discr = i;
|
||||||
offsets.push_back(variant_offsets[discr]++);
|
}
|
||||||
|
|
||||||
|
variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto & discriminators_data = col.getLocalDiscriminators();
|
||||||
|
auto & offsets = col.getOffsets();
|
||||||
|
size_t num_new_offsets = discriminators_data.size() - offsets.size();
|
||||||
|
offsets.reserve(offsets.size() + num_new_offsets);
|
||||||
|
/// If there are only NULLs were read, fill offsets with 0.
|
||||||
|
if (num_non_empty_variants == 0)
|
||||||
|
{
|
||||||
|
offsets.resize_fill(discriminators_data.size(), 0);
|
||||||
|
}
|
||||||
|
/// If there is only 1 variant and no NULLs was read, fill offsets with sequential offsets of this variant.
|
||||||
|
else if (num_non_empty_variants == 1 && variant_limits[last_non_empty_discr] == num_new_offsets)
|
||||||
|
{
|
||||||
|
size_t first_offset = col.getVariantByLocalDiscriminator(last_non_empty_discr).size() - num_new_offsets;
|
||||||
|
for (size_t i = 0; i != num_new_offsets; ++i)
|
||||||
|
offsets.push_back(first_offset + i);
|
||||||
|
}
|
||||||
|
/// Otherwise iterate through discriminators and fill offsets accordingly.
|
||||||
|
else
|
||||||
|
{
|
||||||
|
size_t start = offsets.size();
|
||||||
|
for (size_t i = start; i != discriminators_data.size(); ++i)
|
||||||
|
{
|
||||||
|
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||||
|
if (discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||||
|
offsets.emplace_back();
|
||||||
|
else
|
||||||
|
offsets.push_back(variant_offsets[discr]++);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
addToSubstreamsCache(cache, settings.path, col.getOffsetsPtr());
|
addToSubstreamsCache(cache, settings.path, col.getOffsetsPtr());
|
||||||
@ -357,6 +550,72 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
|
|||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<size_t> SerializationVariant::deserializeCompactDiscriminators(
|
||||||
|
DB::ColumnPtr & discriminators_column,
|
||||||
|
size_t limit,
|
||||||
|
ReadBuffer * stream,
|
||||||
|
bool continuous_reading,
|
||||||
|
DeserializeBinaryBulkStateVariantDiscriminators & state) const
|
||||||
|
{
|
||||||
|
auto & discriminators = assert_cast<ColumnVariant::ColumnDiscriminators &>(*discriminators_column->assumeMutable());
|
||||||
|
auto & discriminators_data = discriminators.getData();
|
||||||
|
|
||||||
|
/// Reset state if we are reading from the start of the granule and not from the previous position in the file.
|
||||||
|
if (!continuous_reading)
|
||||||
|
state.remaining_rows_in_granule = 0;
|
||||||
|
|
||||||
|
/// Calculate limits for variants during discriminators deserialization.
|
||||||
|
std::vector<size_t> variant_limits(variants.size(), 0);
|
||||||
|
while (limit)
|
||||||
|
{
|
||||||
|
/// If we read all rows from current granule, start reading the next one.
|
||||||
|
if (state.remaining_rows_in_granule == 0)
|
||||||
|
{
|
||||||
|
if (stream->eof())
|
||||||
|
return variant_limits;
|
||||||
|
|
||||||
|
readDiscriminatorsGranuleStart(state, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t limit_in_granule = std::min(limit, state.remaining_rows_in_granule);
|
||||||
|
if (state.granule_format == CompactDiscriminatorsGranuleFormat::COMPACT)
|
||||||
|
{
|
||||||
|
auto & data = discriminators.getData();
|
||||||
|
data.resize_fill(data.size() + limit_in_granule, state.compact_discr);
|
||||||
|
if (state.compact_discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||||
|
variant_limits[state.compact_discr] += limit_in_granule;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0);
|
||||||
|
size_t start = discriminators_data.size() - limit_in_granule;
|
||||||
|
for (size_t i = start; i != discriminators_data.size(); ++i)
|
||||||
|
{
|
||||||
|
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||||
|
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||||
|
++variant_limits[discr];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
state.remaining_rows_in_granule -= limit_in_granule;
|
||||||
|
limit -= limit_in_granule;
|
||||||
|
}
|
||||||
|
|
||||||
|
return variant_limits;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SerializationVariant::readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, DB::ReadBuffer * stream)
|
||||||
|
{
|
||||||
|
UInt64 granule_size;
|
||||||
|
readVarUInt(granule_size, *stream);
|
||||||
|
state.remaining_rows_in_granule = granule_size;
|
||||||
|
UInt8 granule_format;
|
||||||
|
readBinaryLittleEndian(granule_format, *stream);
|
||||||
|
state.granule_format = static_cast<CompactDiscriminatorsGranuleFormat>(granule_format);
|
||||||
|
if (granule_format == CompactDiscriminatorsGranuleFormat::COMPACT)
|
||||||
|
readBinaryLittleEndian(state.compact_discr, *stream);
|
||||||
|
}
|
||||||
|
|
||||||
void SerializationVariant::addVariantElementToPath(DB::ISerialization::SubstreamPath & path, size_t i) const
|
void SerializationVariant::addVariantElementToPath(DB::ISerialization::SubstreamPath & path, size_t i) const
|
||||||
{
|
{
|
||||||
path.push_back(Substream::VariantElement);
|
path.push_back(Substream::VariantElement);
|
||||||
|
@ -2,10 +2,18 @@
|
|||||||
|
|
||||||
#include <DataTypes/Serializations/ISerialization.h>
|
#include <DataTypes/Serializations/ISerialization.h>
|
||||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||||
|
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int INCORRECT_DATA;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Class for serializing/deserializing column with Variant type.
|
/// Class for serializing/deserializing column with Variant type.
|
||||||
/// It supports both text and binary bulk serializations/deserializations.
|
/// It supports both text and binary bulk serializations/deserializations.
|
||||||
///
|
///
|
||||||
@ -18,6 +26,17 @@ namespace DB
|
|||||||
///
|
///
|
||||||
/// During binary bulk serialization it transforms local discriminators
|
/// During binary bulk serialization it transforms local discriminators
|
||||||
/// to global and serializes them into a separate stream VariantDiscriminators.
|
/// to global and serializes them into a separate stream VariantDiscriminators.
|
||||||
|
/// There are 2 modes of serialising discriminators:
|
||||||
|
/// Basic mode, when all discriminators are serialized as is row by row.
|
||||||
|
/// Compact mode, when we avoid writing the same discriminators in granules when there is
|
||||||
|
/// only one variant (or only NULLs) in the granule.
|
||||||
|
/// In compact mode we serialize granules in the following format:
|
||||||
|
/// <number of rows in granule><granule format><granule data>
|
||||||
|
/// There are 2 different formats of granule - plain and compact.
|
||||||
|
/// Plain format is used when there are different discriminators in this granule,
|
||||||
|
/// in this format all discriminators are serialized as is row by row.
|
||||||
|
/// Compact format is used when all discriminators are the same in this granule,
|
||||||
|
/// in this case only this single discriminator is serialized.
|
||||||
/// Each variant is serialized into a separate stream with path VariantElements/VariantElement
|
/// Each variant is serialized into a separate stream with path VariantElements/VariantElement
|
||||||
/// (VariantElements stream is needed for correct sub-columns creation). We store and serialize
|
/// (VariantElements stream is needed for correct sub-columns creation). We store and serialize
|
||||||
/// variants in a sparse form (the size of a variant column equals to the number of its discriminator
|
/// variants in a sparse form (the size of a variant column equals to the number of its discriminator
|
||||||
@ -32,6 +51,25 @@ namespace DB
|
|||||||
class SerializationVariant : public ISerialization
|
class SerializationVariant : public ISerialization
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
struct DiscriminatorsSerializationMode
|
||||||
|
{
|
||||||
|
enum Value
|
||||||
|
{
|
||||||
|
BASIC = 0, /// Store the whole discriminators column.
|
||||||
|
COMPACT = 1, /// Don't write discriminators in granule if all of them are the same.
|
||||||
|
};
|
||||||
|
|
||||||
|
static void checkMode(UInt64 mode)
|
||||||
|
{
|
||||||
|
if (mode > Value::COMPACT)
|
||||||
|
throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for SerializationVariant discriminators column.");
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit DiscriminatorsSerializationMode(UInt64 mode) : value(static_cast<Value>(mode)) { checkMode(mode); }
|
||||||
|
|
||||||
|
Value value;
|
||||||
|
};
|
||||||
|
|
||||||
using VariantSerializations = std::vector<SerializationPtr>;
|
using VariantSerializations = std::vector<SerializationPtr>;
|
||||||
|
|
||||||
explicit SerializationVariant(
|
explicit SerializationVariant(
|
||||||
@ -123,8 +161,44 @@ public:
|
|||||||
static std::vector<size_t> getVariantsDeserializeTextOrder(const DataTypes & variant_types);
|
static std::vector<size_t> getVariantsDeserializeTextOrder(const DataTypes & variant_types);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
friend SerializationVariantElement;
|
||||||
|
friend SerializationVariantElementNullMap;
|
||||||
|
|
||||||
void addVariantElementToPath(SubstreamPath & path, size_t i) const;
|
void addVariantElementToPath(SubstreamPath & path, size_t i) const;
|
||||||
|
|
||||||
|
enum CompactDiscriminatorsGranuleFormat
|
||||||
|
{
|
||||||
|
PLAIN = 0, /// Granule has different discriminators and they are serialized as is row by row.
|
||||||
|
COMPACT = 1, /// Granule has single discriminator for all rows and it is serialized as single value.
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DeserializeBinaryBulkStateVariantDiscriminators : public ISerialization::DeserializeBinaryBulkState
|
||||||
|
{
|
||||||
|
explicit DeserializeBinaryBulkStateVariantDiscriminators(UInt64 mode_) : mode(mode_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
DiscriminatorsSerializationMode mode;
|
||||||
|
|
||||||
|
/// Deserialize state of currently read granule in compact mode.
|
||||||
|
CompactDiscriminatorsGranuleFormat granule_format = CompactDiscriminatorsGranuleFormat::PLAIN;
|
||||||
|
size_t remaining_rows_in_granule = 0;
|
||||||
|
ColumnVariant::Discriminator compact_discr = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
static DeserializeBinaryBulkStatePtr deserializeDiscriminatorsStatePrefix(
|
||||||
|
DeserializeBinaryBulkSettings & settings,
|
||||||
|
SubstreamsDeserializeStatesCache * cache);
|
||||||
|
|
||||||
|
std::vector<size_t> deserializeCompactDiscriminators(
|
||||||
|
ColumnPtr & discriminators_column,
|
||||||
|
size_t limit,
|
||||||
|
ReadBuffer * stream,
|
||||||
|
bool continuous_reading,
|
||||||
|
DeserializeBinaryBulkStateVariantDiscriminators & state) const;
|
||||||
|
|
||||||
|
static void readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, ReadBuffer * stream);
|
||||||
|
|
||||||
bool tryDeserializeTextEscapedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
bool tryDeserializeTextEscapedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||||
bool tryDeserializeTextQuotedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
bool tryDeserializeTextQuotedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||||
bool tryDeserializeWholeTextImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
bool tryDeserializeWholeTextImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||||
#include <DataTypes/Serializations/SerializationNumber.h>
|
#include <DataTypes/Serializations/SerializationNumber.h>
|
||||||
|
#include <DataTypes/Serializations/SerializationVariant.h>
|
||||||
#include <Columns/ColumnLowCardinality.h>
|
#include <Columns/ColumnLowCardinality.h>
|
||||||
#include <Columns/ColumnNullable.h>
|
#include <Columns/ColumnNullable.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
@ -12,7 +13,7 @@ namespace ErrorCodes
|
|||||||
extern const int NOT_IMPLEMENTED;
|
extern const int NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState
|
struct SerializationVariantElement::DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState
|
||||||
{
|
{
|
||||||
/// During deserialization discriminators and variant streams can be shared.
|
/// During deserialization discriminators and variant streams can be shared.
|
||||||
/// For example we can read several variant elements together: "select v.UInt32, v.String from table",
|
/// For example we can read several variant elements together: "select v.UInt32, v.String from table",
|
||||||
@ -24,7 +25,7 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria
|
|||||||
/// substream cache correctly.
|
/// substream cache correctly.
|
||||||
ColumnPtr discriminators;
|
ColumnPtr discriminators;
|
||||||
ColumnPtr variant;
|
ColumnPtr variant;
|
||||||
|
ISerialization::DeserializeBinaryBulkStatePtr discriminators_state;
|
||||||
ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
|
ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -65,7 +66,12 @@ void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinary
|
|||||||
void SerializationVariantElement::deserializeBinaryBulkStatePrefix(
|
void SerializationVariantElement::deserializeBinaryBulkStatePrefix(
|
||||||
DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const
|
DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const
|
||||||
{
|
{
|
||||||
|
DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache);
|
||||||
|
if (!discriminators_state)
|
||||||
|
return;
|
||||||
|
|
||||||
auto variant_element_state = std::make_shared<DeserializeBinaryBulkStateVariantElement>();
|
auto variant_element_state = std::make_shared<DeserializeBinaryBulkStateVariantElement>();
|
||||||
|
variant_element_state->discriminators_state = discriminators_state;
|
||||||
|
|
||||||
addVariantToPath(settings.path);
|
addVariantToPath(settings.path);
|
||||||
nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state, cache);
|
nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state, cache);
|
||||||
@ -86,35 +92,61 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
|||||||
DeserializeBinaryBulkStatePtr & state,
|
DeserializeBinaryBulkStatePtr & state,
|
||||||
SubstreamsCache * cache) const
|
SubstreamsCache * cache) const
|
||||||
{
|
{
|
||||||
auto * variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
|
|
||||||
|
|
||||||
/// First, deserialize discriminators from Variant column.
|
/// First, deserialize discriminators from Variant column.
|
||||||
settings.path.push_back(Substream::VariantDiscriminators);
|
settings.path.push_back(Substream::VariantDiscriminators);
|
||||||
|
|
||||||
|
DeserializeBinaryBulkStateVariantElement * variant_element_state = nullptr;
|
||||||
|
std::optional<size_t> variant_limit;
|
||||||
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
|
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
|
||||||
{
|
{
|
||||||
|
variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
|
||||||
variant_element_state->discriminators = cached_discriminators;
|
variant_element_state->discriminators = cached_discriminators;
|
||||||
}
|
}
|
||||||
else
|
else if (auto * discriminators_stream = settings.getter(settings.path))
|
||||||
{
|
{
|
||||||
auto * discriminators_stream = settings.getter(settings.path);
|
variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
|
||||||
if (!discriminators_stream)
|
auto * discriminators_state = checkAndGetState<SerializationVariant::DeserializeBinaryBulkStateVariantDiscriminators>(variant_element_state->discriminators_state);
|
||||||
return;
|
|
||||||
|
|
||||||
/// If we started to read a new column, reinitialize discriminators column in deserialization state.
|
/// If we started to read a new column, reinitialize discriminators column in deserialization state.
|
||||||
if (!variant_element_state->discriminators || result_column->empty())
|
if (!variant_element_state->discriminators || result_column->empty())
|
||||||
variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create();
|
variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create();
|
||||||
|
|
||||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
|
/// Deserialize discriminators according to serialization mode.
|
||||||
|
if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC)
|
||||||
|
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
|
||||||
|
else
|
||||||
|
variant_limit = deserializeCompactDiscriminators(
|
||||||
|
variant_element_state->discriminators,
|
||||||
|
variant_discriminator,
|
||||||
|
limit,
|
||||||
|
discriminators_stream,
|
||||||
|
settings.continuous_reading,
|
||||||
|
variant_element_state->discriminators_state,
|
||||||
|
this);
|
||||||
|
|
||||||
addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators);
|
addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
settings.path.pop_back();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
settings.path.pop_back();
|
settings.path.pop_back();
|
||||||
|
|
||||||
/// Iterate through new discriminators to calculate the limit for our variant.
|
/// We could read less than limit discriminators, but we will need actual number of read rows later.
|
||||||
|
size_t num_new_discriminators = variant_element_state->discriminators->size() - result_column->size();
|
||||||
|
|
||||||
|
/// Iterate through new discriminators to calculate the limit for our variant
|
||||||
|
/// if we didn't do it during discriminators deserialization.
|
||||||
const auto & discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*variant_element_state->discriminators).getData();
|
const auto & discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*variant_element_state->discriminators).getData();
|
||||||
size_t discriminators_offset = variant_element_state->discriminators->size() - limit;
|
size_t discriminators_offset = variant_element_state->discriminators->size() - num_new_discriminators;
|
||||||
size_t variant_limit = 0;
|
if (!variant_limit)
|
||||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
{
|
||||||
variant_limit += (discriminators_data[i] == variant_discriminator);
|
variant_limit = 0;
|
||||||
|
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||||
|
*variant_limit += (discriminators_data[i] == variant_discriminator);
|
||||||
|
}
|
||||||
|
|
||||||
/// Now we know the limit for our variant and can deserialize it.
|
/// Now we know the limit for our variant and can deserialize it.
|
||||||
|
|
||||||
@ -125,19 +157,19 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
|||||||
auto & nullable_column = assert_cast<ColumnNullable &>(*mutable_column);
|
auto & nullable_column = assert_cast<ColumnNullable &>(*mutable_column);
|
||||||
NullMap & null_map = nullable_column.getNullMapData();
|
NullMap & null_map = nullable_column.getNullMapData();
|
||||||
/// If we have only our discriminator in range, fill null map with 0.
|
/// If we have only our discriminator in range, fill null map with 0.
|
||||||
if (variant_limit == limit)
|
if (variant_limit == num_new_discriminators)
|
||||||
{
|
{
|
||||||
null_map.resize_fill(null_map.size() + limit, 0);
|
null_map.resize_fill(null_map.size() + num_new_discriminators, 0);
|
||||||
}
|
}
|
||||||
/// If no our discriminator in current range, fill null map with 1.
|
/// If no our discriminator in current range, fill null map with 1.
|
||||||
else if (variant_limit == 0)
|
else if (variant_limit == 0)
|
||||||
{
|
{
|
||||||
null_map.resize_fill(null_map.size() + limit, 1);
|
null_map.resize_fill(null_map.size() + num_new_discriminators, 1);
|
||||||
}
|
}
|
||||||
/// Otherwise we should iterate through discriminators to fill null map.
|
/// Otherwise we should iterate through discriminators to fill null map.
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
null_map.reserve(null_map.size() + limit);
|
null_map.reserve(null_map.size() + num_new_discriminators);
|
||||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||||
null_map.push_back(discriminators_data[i] != variant_discriminator);
|
null_map.push_back(discriminators_data[i] != variant_discriminator);
|
||||||
}
|
}
|
||||||
@ -159,12 +191,12 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
|||||||
/// If nothing to deserialize, just insert defaults.
|
/// If nothing to deserialize, just insert defaults.
|
||||||
if (variant_limit == 0)
|
if (variant_limit == 0)
|
||||||
{
|
{
|
||||||
mutable_column->insertManyDefaults(limit);
|
mutable_column->insertManyDefaults(num_new_discriminators);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
addVariantToPath(settings.path);
|
addVariantToPath(settings.path);
|
||||||
nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, variant_limit, settings, variant_element_state->variant_element_state, cache);
|
nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, *variant_limit, settings, variant_element_state->variant_element_state, cache);
|
||||||
removeVariantFromPath(settings.path);
|
removeVariantFromPath(settings.path);
|
||||||
|
|
||||||
/// If nothing was deserialized when variant_limit > 0
|
/// If nothing was deserialized when variant_limit > 0
|
||||||
@ -173,16 +205,16 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
|||||||
/// In this case we should just insert default values.
|
/// In this case we should just insert default values.
|
||||||
if (variant_element_state->variant->empty())
|
if (variant_element_state->variant->empty())
|
||||||
{
|
{
|
||||||
mutable_column->insertManyDefaults(limit);
|
mutable_column->insertManyDefaults(num_new_discriminators);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t variant_offset = variant_element_state->variant->size() - variant_limit;
|
size_t variant_offset = variant_element_state->variant->size() - *variant_limit;
|
||||||
|
|
||||||
/// If we have only our discriminator in range, insert the whole range to result column.
|
/// If we have only our discriminator in range, insert the whole range to result column.
|
||||||
if (variant_limit == limit)
|
if (variant_limit == num_new_discriminators)
|
||||||
{
|
{
|
||||||
mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, variant_limit);
|
mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, *variant_limit);
|
||||||
}
|
}
|
||||||
/// Otherwise iterate through discriminators and insert value from variant or default value depending on the discriminator.
|
/// Otherwise iterate through discriminators and insert value from variant or default value depending on the discriminator.
|
||||||
else
|
else
|
||||||
@ -197,6 +229,59 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t SerializationVariantElement::deserializeCompactDiscriminators(
|
||||||
|
DB::ColumnPtr & discriminators_column,
|
||||||
|
ColumnVariant::Discriminator variant_discriminator,
|
||||||
|
size_t limit,
|
||||||
|
DB::ReadBuffer * stream,
|
||||||
|
bool continuous_reading,
|
||||||
|
DeserializeBinaryBulkStatePtr & discriminators_state_,
|
||||||
|
const ISerialization * serialization)
|
||||||
|
{
|
||||||
|
auto * discriminators_state = checkAndGetState<SerializationVariant::DeserializeBinaryBulkStateVariantDiscriminators>(discriminators_state_, serialization);
|
||||||
|
auto & discriminators = assert_cast<ColumnVariant::ColumnDiscriminators &>(*discriminators_column->assumeMutable());
|
||||||
|
auto & discriminators_data = discriminators.getData();
|
||||||
|
|
||||||
|
/// Reset state if we are reading from the start of the granule and not from the previous position in the file.
|
||||||
|
if (!continuous_reading)
|
||||||
|
discriminators_state->remaining_rows_in_granule = 0;
|
||||||
|
|
||||||
|
/// Calculate our variant limit during discriminators deserialization.
|
||||||
|
size_t variant_limit = 0;
|
||||||
|
while (limit)
|
||||||
|
{
|
||||||
|
/// If we read all rows from current granule, start reading the next one.
|
||||||
|
if (discriminators_state->remaining_rows_in_granule == 0)
|
||||||
|
{
|
||||||
|
if (stream->eof())
|
||||||
|
return variant_limit;
|
||||||
|
|
||||||
|
SerializationVariant::readDiscriminatorsGranuleStart(*discriminators_state, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t limit_in_granule = std::min(limit, discriminators_state->remaining_rows_in_granule);
|
||||||
|
if (discriminators_state->granule_format == SerializationVariant::CompactDiscriminatorsGranuleFormat::COMPACT)
|
||||||
|
{
|
||||||
|
auto & data = discriminators.getData();
|
||||||
|
data.resize_fill(data.size() + limit_in_granule, discriminators_state->compact_discr);
|
||||||
|
if (discriminators_state->compact_discr == variant_discriminator)
|
||||||
|
variant_limit += limit_in_granule;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0);
|
||||||
|
size_t start = discriminators_data.size() - limit_in_granule;
|
||||||
|
for (size_t i = start; i != discriminators_data.size(); ++i)
|
||||||
|
variant_limit += (discriminators_data[i] == variant_discriminator);
|
||||||
|
}
|
||||||
|
|
||||||
|
discriminators_state->remaining_rows_in_granule -= limit_in_granule;
|
||||||
|
limit -= limit_in_granule;
|
||||||
|
}
|
||||||
|
|
||||||
|
return variant_limit;
|
||||||
|
}
|
||||||
|
|
||||||
void SerializationVariantElement::addVariantToPath(DB::ISerialization::SubstreamPath & path) const
|
void SerializationVariantElement::addVariantToPath(DB::ISerialization::SubstreamPath & path) const
|
||||||
{
|
{
|
||||||
path.push_back(Substream::VariantElements);
|
path.push_back(Substream::VariantElements);
|
||||||
@ -214,17 +299,19 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator(
|
|||||||
const ColumnPtr & local_discriminators_,
|
const ColumnPtr & local_discriminators_,
|
||||||
const String & variant_element_name_,
|
const String & variant_element_name_,
|
||||||
ColumnVariant::Discriminator global_variant_discriminator_,
|
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||||
ColumnVariant::Discriminator local_variant_discriminator_)
|
ColumnVariant::Discriminator local_variant_discriminator_,
|
||||||
|
bool make_nullable_)
|
||||||
: local_discriminators(local_discriminators_)
|
: local_discriminators(local_discriminators_)
|
||||||
, variant_element_name(variant_element_name_)
|
, variant_element_name(variant_element_name_)
|
||||||
, global_variant_discriminator(global_variant_discriminator_)
|
, global_variant_discriminator(global_variant_discriminator_)
|
||||||
, local_variant_discriminator(local_variant_discriminator_)
|
, local_variant_discriminator(local_variant_discriminator_)
|
||||||
|
, make_nullable(make_nullable_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const
|
DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const
|
||||||
{
|
{
|
||||||
return makeNullableOrLowCardinalityNullableSafe(prev);
|
return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev;
|
||||||
}
|
}
|
||||||
|
|
||||||
SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const
|
SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const
|
||||||
@ -237,12 +324,12 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB:
|
|||||||
/// Case when original Variant column contained only one non-empty variant and no NULLs.
|
/// Case when original Variant column contained only one non-empty variant and no NULLs.
|
||||||
/// In this case just use this variant.
|
/// In this case just use this variant.
|
||||||
if (prev->size() == local_discriminators->size())
|
if (prev->size() == local_discriminators->size())
|
||||||
return makeNullableOrLowCardinalityNullableSafe(prev);
|
return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev;
|
||||||
|
|
||||||
/// If this variant is empty, fill result column with default values.
|
/// If this variant is empty, fill result column with default values.
|
||||||
if (prev->empty())
|
if (prev->empty())
|
||||||
{
|
{
|
||||||
auto res = makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty();
|
auto res = make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty() : prev->cloneEmpty();
|
||||||
res->insertManyDefaults(local_discriminators->size());
|
res->insertManyDefaults(local_discriminators->size());
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
@ -257,16 +344,16 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB:
|
|||||||
/// Now we can create new column from null-map and variant column using IColumn::expand.
|
/// Now we can create new column from null-map and variant column using IColumn::expand.
|
||||||
auto res_column = IColumn::mutate(prev);
|
auto res_column = IColumn::mutate(prev);
|
||||||
|
|
||||||
/// Special case for LowCardinality. We want the result to be LowCardinality(Nullable),
|
/// Special case for LowCardinality when we want the result to be LowCardinality(Nullable),
|
||||||
/// but we don't have a good way to apply null-mask for LowCardinality(), so, we first
|
/// but we don't have a good way to apply null-mask for LowCardinality(), so, we first
|
||||||
/// convert our column to LowCardinality(Nullable()) and then use expand which will
|
/// convert our column to LowCardinality(Nullable()) and then use expand which will
|
||||||
/// fill rows with 0 in mask with default value (that is NULL).
|
/// fill rows with 0 in mask with default value (that is NULL).
|
||||||
if (prev->lowCardinality())
|
if (make_nullable && prev->lowCardinality())
|
||||||
res_column = assert_cast<ColumnLowCardinality &>(*res_column).cloneNullable();
|
res_column = assert_cast<ColumnLowCardinality &>(*res_column).cloneNullable();
|
||||||
|
|
||||||
res_column->expand(null_map, /*inverted = */ true);
|
res_column->expand(null_map, /*inverted = */ true);
|
||||||
|
|
||||||
if (res_column->canBeInsideNullable())
|
if (make_nullable && prev->canBeInsideNullable())
|
||||||
{
|
{
|
||||||
auto null_map_col = ColumnUInt8::create();
|
auto null_map_col = ColumnUInt8::create();
|
||||||
null_map_col->getData() = std::move(null_map);
|
null_map_col->getData() = std::move(null_map);
|
||||||
|
@ -9,6 +9,7 @@ namespace DB
|
|||||||
{
|
{
|
||||||
|
|
||||||
class SerializationVariant;
|
class SerializationVariant;
|
||||||
|
class SerializationVariantElementNullMap;
|
||||||
|
|
||||||
/// Serialization for Variant element when we read it as a subcolumn.
|
/// Serialization for Variant element when we read it as a subcolumn.
|
||||||
class SerializationVariantElement final : public SerializationWrapper
|
class SerializationVariantElement final : public SerializationWrapper
|
||||||
@ -66,12 +67,14 @@ public:
|
|||||||
const String variant_element_name;
|
const String variant_element_name;
|
||||||
const ColumnVariant::Discriminator global_variant_discriminator;
|
const ColumnVariant::Discriminator global_variant_discriminator;
|
||||||
const ColumnVariant::Discriminator local_variant_discriminator;
|
const ColumnVariant::Discriminator local_variant_discriminator;
|
||||||
|
bool make_nullable;
|
||||||
|
|
||||||
VariantSubcolumnCreator(
|
VariantSubcolumnCreator(
|
||||||
const ColumnPtr & local_discriminators_,
|
const ColumnPtr & local_discriminators_,
|
||||||
const String & variant_element_name_,
|
const String & variant_element_name_,
|
||||||
ColumnVariant::Discriminator global_variant_discriminator_,
|
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||||
ColumnVariant::Discriminator local_variant_discriminator_);
|
ColumnVariant::Discriminator local_variant_discriminator_,
|
||||||
|
bool make_nullable_);
|
||||||
|
|
||||||
DataTypePtr create(const DataTypePtr & prev) const override;
|
DataTypePtr create(const DataTypePtr & prev) const override;
|
||||||
ColumnPtr create(const ColumnPtr & prev) const override;
|
ColumnPtr create(const ColumnPtr & prev) const override;
|
||||||
@ -79,6 +82,18 @@ public:
|
|||||||
};
|
};
|
||||||
private:
|
private:
|
||||||
friend SerializationVariant;
|
friend SerializationVariant;
|
||||||
|
friend SerializationVariantElementNullMap;
|
||||||
|
|
||||||
|
struct DeserializeBinaryBulkStateVariantElement;
|
||||||
|
|
||||||
|
static size_t deserializeCompactDiscriminators(
|
||||||
|
ColumnPtr & discriminators_column,
|
||||||
|
ColumnVariant::Discriminator variant_discriminator,
|
||||||
|
size_t limit,
|
||||||
|
ReadBuffer * stream,
|
||||||
|
bool continuous_reading,
|
||||||
|
DeserializeBinaryBulkStatePtr & discriminators_state_,
|
||||||
|
const ISerialization * serialization);
|
||||||
|
|
||||||
void addVariantToPath(SubstreamPath & path) const;
|
void addVariantToPath(SubstreamPath & path) const;
|
||||||
void removeVariantFromPath(SubstreamPath & path) const;
|
void removeVariantFromPath(SubstreamPath & path) const;
|
||||||
|
@ -0,0 +1,190 @@
|
|||||||
|
#include <Columns/ColumnLowCardinality.h>
|
||||||
|
#include <Columns/ColumnNullable.h>
|
||||||
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
|
#include <DataTypes/Serializations/SerializationNumber.h>
|
||||||
|
#include <DataTypes/Serializations/SerializationVariant.h>
|
||||||
|
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||||
|
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||||
|
#include <IO/ReadHelpers.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DeserializeBinaryBulkStateVariantElementNullMap : public ISerialization::DeserializeBinaryBulkState
|
||||||
|
{
|
||||||
|
/// During deserialization discriminators streams can be shared.
|
||||||
|
/// For example we can read several variant elements together: "select v.UInt32, v.String.null from table",
|
||||||
|
/// or we can read the whole variant and some of variant elements or their subcolumns: "select v, v.UInt32.null from table".
|
||||||
|
/// To read the same column from the same stream more than once we use substream cache,
|
||||||
|
/// but this cache stores the whole column, not only the current range.
|
||||||
|
/// During deserialization of variant elements or their subcolumns discriminators column is not stored
|
||||||
|
/// in the result column, so we need to store them inside deserialization state, so we can use
|
||||||
|
/// substream cache correctly.
|
||||||
|
ColumnPtr discriminators;
|
||||||
|
ISerialization::DeserializeBinaryBulkStatePtr discriminators_state;
|
||||||
|
};
|
||||||
|
|
||||||
|
void SerializationVariantElementNullMap::enumerateStreams(
|
||||||
|
DB::ISerialization::EnumerateStreamsSettings & settings,
|
||||||
|
const DB::ISerialization::StreamCallback & callback,
|
||||||
|
const DB::ISerialization::SubstreamData &) const
|
||||||
|
{
|
||||||
|
/// We will need stream for discriminators during deserialization.
|
||||||
|
settings.path.push_back(Substream::VariantDiscriminators);
|
||||||
|
callback(settings.path);
|
||||||
|
settings.path.pop_back();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SerializationVariantElementNullMap::serializeBinaryBulkStatePrefix(
|
||||||
|
const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
|
||||||
|
{
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElementNullMap");
|
||||||
|
}
|
||||||
|
|
||||||
|
void SerializationVariantElementNullMap::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
|
||||||
|
{
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElementNullMap");
|
||||||
|
}
|
||||||
|
|
||||||
|
void SerializationVariantElementNullMap::deserializeBinaryBulkStatePrefix(
|
||||||
|
DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const
|
||||||
|
{
|
||||||
|
DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache);
|
||||||
|
if (!discriminators_state)
|
||||||
|
return;
|
||||||
|
|
||||||
|
auto variant_element_null_map_state = std::make_shared<DeserializeBinaryBulkStateVariantElementNullMap>();
|
||||||
|
variant_element_null_map_state->discriminators_state = std::move(discriminators_state);
|
||||||
|
state = std::move(variant_element_null_map_state);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SerializationVariantElementNullMap::serializeBinaryBulkWithMultipleStreams(
|
||||||
|
const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
|
||||||
|
{
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::NOT_IMPLEMENTED,
|
||||||
|
"Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationVariantElementNullMap");
|
||||||
|
}
|
||||||
|
|
||||||
|
void SerializationVariantElementNullMap::deserializeBinaryBulkWithMultipleStreams(
|
||||||
|
ColumnPtr & result_column,
|
||||||
|
size_t limit,
|
||||||
|
DeserializeBinaryBulkSettings & settings,
|
||||||
|
DeserializeBinaryBulkStatePtr & state,
|
||||||
|
SubstreamsCache * cache) const
|
||||||
|
{
|
||||||
|
/// Deserialize discriminators from Variant column.
|
||||||
|
settings.path.push_back(Substream::VariantDiscriminators);
|
||||||
|
|
||||||
|
DeserializeBinaryBulkStateVariantElementNullMap * variant_element_null_map_state = nullptr;
|
||||||
|
std::optional<size_t> variant_limit;
|
||||||
|
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
|
||||||
|
{
|
||||||
|
variant_element_null_map_state = checkAndGetState<DeserializeBinaryBulkStateVariantElementNullMap>(state);
|
||||||
|
variant_element_null_map_state->discriminators = cached_discriminators;
|
||||||
|
}
|
||||||
|
else if (auto * discriminators_stream = settings.getter(settings.path))
|
||||||
|
{
|
||||||
|
variant_element_null_map_state = checkAndGetState<DeserializeBinaryBulkStateVariantElementNullMap>(state);
|
||||||
|
auto * discriminators_state = checkAndGetState<SerializationVariant::DeserializeBinaryBulkStateVariantDiscriminators>(
|
||||||
|
variant_element_null_map_state->discriminators_state);
|
||||||
|
|
||||||
|
/// If we started to read a new column, reinitialize discriminators column in deserialization state.
|
||||||
|
if (!variant_element_null_map_state->discriminators || result_column->empty())
|
||||||
|
variant_element_null_map_state->discriminators = ColumnVariant::ColumnDiscriminators::create();
|
||||||
|
|
||||||
|
/// Deserialize discriminators according to serialization mode.
|
||||||
|
if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC)
|
||||||
|
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(
|
||||||
|
*variant_element_null_map_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
|
||||||
|
else
|
||||||
|
variant_limit = SerializationVariantElement::deserializeCompactDiscriminators(
|
||||||
|
variant_element_null_map_state->discriminators,
|
||||||
|
variant_discriminator,
|
||||||
|
limit,
|
||||||
|
discriminators_stream,
|
||||||
|
settings.continuous_reading,
|
||||||
|
variant_element_null_map_state->discriminators_state,
|
||||||
|
this);
|
||||||
|
|
||||||
|
addToSubstreamsCache(cache, settings.path, variant_element_null_map_state->discriminators);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// There is no such stream or cached data, it means that there is no Variant column in this part (it could happen after alter table add column).
|
||||||
|
/// In such cases columns are filled with default values, but for null-map column default value should be 1, not 0. Fill column with 1 here instead.
|
||||||
|
MutableColumnPtr mutable_column = result_column->assumeMutable();
|
||||||
|
auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||||
|
data.resize_fill(data.size() + limit, 1);
|
||||||
|
settings.path.pop_back();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
settings.path.pop_back();
|
||||||
|
|
||||||
|
MutableColumnPtr mutable_column = result_column->assumeMutable();
|
||||||
|
auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||||
|
/// Check if there are no such variant in read range.
|
||||||
|
if (variant_limit && *variant_limit == 0)
|
||||||
|
{
|
||||||
|
data.resize_fill(data.size() + limit, 1);
|
||||||
|
}
|
||||||
|
/// Check if there is only our variant in read range.
|
||||||
|
else if (variant_limit && *variant_limit == limit)
|
||||||
|
{
|
||||||
|
data.resize_fill(data.size() + limit, 0);
|
||||||
|
}
|
||||||
|
/// Iterate through new discriminators to calculate the null map of our variant.
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const auto & discriminators_data
|
||||||
|
= assert_cast<const ColumnVariant::ColumnDiscriminators &>(*variant_element_null_map_state->discriminators).getData();
|
||||||
|
size_t discriminators_offset = variant_element_null_map_state->discriminators->size() - limit;
|
||||||
|
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||||
|
data.push_back(discriminators_data[i] != variant_discriminator);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::VariantNullMapSubcolumnCreator(
|
||||||
|
const ColumnPtr & local_discriminators_,
|
||||||
|
const String & variant_element_name_,
|
||||||
|
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||||
|
ColumnVariant::Discriminator local_variant_discriminator_)
|
||||||
|
: local_discriminators(local_discriminators_)
|
||||||
|
, variant_element_name(variant_element_name_)
|
||||||
|
, global_variant_discriminator(global_variant_discriminator_)
|
||||||
|
, local_variant_discriminator(local_variant_discriminator_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
DataTypePtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::DataTypePtr &) const
|
||||||
|
{
|
||||||
|
return std::make_shared<DataTypeUInt8>();
|
||||||
|
}
|
||||||
|
|
||||||
|
SerializationPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::SerializationPtr &) const
|
||||||
|
{
|
||||||
|
return std::make_shared<SerializationVariantElementNullMap>(variant_element_name, global_variant_discriminator);
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::ColumnPtr &) const
|
||||||
|
{
|
||||||
|
/// Iterate through discriminators and create null-map for our variant.
|
||||||
|
auto null_map_col = ColumnUInt8::create();
|
||||||
|
auto & null_map_data = null_map_col->getData();
|
||||||
|
null_map_data.reserve(local_discriminators->size());
|
||||||
|
const auto & local_discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*local_discriminators).getData();
|
||||||
|
for (auto local_discr : local_discriminators_data)
|
||||||
|
null_map_data.push_back(local_discr != local_variant_discriminator);
|
||||||
|
|
||||||
|
return null_map_col;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,107 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <DataTypes/Serializations/SimpleTextSerialization.h>
|
||||||
|
#include <DataTypes/DataTypeNullable.h>
|
||||||
|
#include <Columns/ColumnNullable.h>
|
||||||
|
#include <Columns/ColumnVariant.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int NOT_IMPLEMENTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
class SerializationVariant;
|
||||||
|
class SerializationVariantElement;
|
||||||
|
|
||||||
|
/// Serialization for Variant element null map when we read it as a subcolumn.
|
||||||
|
/// For example, variant.UInt64.null.
|
||||||
|
/// It requires separate serialization because there is no actual Nullable column
|
||||||
|
/// and we should construct null map from variant discriminators.
|
||||||
|
/// The implementation of deserializeBinaryBulk* methods is similar to SerializationVariantElement,
|
||||||
|
/// but differs in that there is no need to read the actual data of the variant, only discriminators.
|
||||||
|
class SerializationVariantElementNullMap final : public SimpleTextSerialization
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SerializationVariantElementNullMap(const String & variant_element_name_, ColumnVariant::Discriminator variant_discriminator_)
|
||||||
|
: variant_element_name(variant_element_name_), variant_discriminator(variant_discriminator_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void enumerateStreams(
|
||||||
|
EnumerateStreamsSettings & settings,
|
||||||
|
const StreamCallback & callback,
|
||||||
|
const SubstreamData & data) const override;
|
||||||
|
|
||||||
|
void serializeBinaryBulkStatePrefix(
|
||||||
|
const IColumn & column,
|
||||||
|
SerializeBinaryBulkSettings & settings,
|
||||||
|
SerializeBinaryBulkStatePtr & state) const override;
|
||||||
|
|
||||||
|
void serializeBinaryBulkStateSuffix(
|
||||||
|
SerializeBinaryBulkSettings & settings,
|
||||||
|
SerializeBinaryBulkStatePtr & state) const override;
|
||||||
|
|
||||||
|
void deserializeBinaryBulkStatePrefix(
|
||||||
|
DeserializeBinaryBulkSettings & settings,
|
||||||
|
DeserializeBinaryBulkStatePtr & state,
|
||||||
|
SubstreamsDeserializeStatesCache * cache) const override;
|
||||||
|
|
||||||
|
void serializeBinaryBulkWithMultipleStreams(
|
||||||
|
const IColumn & column,
|
||||||
|
size_t offset,
|
||||||
|
size_t limit,
|
||||||
|
SerializeBinaryBulkSettings & settings,
|
||||||
|
SerializeBinaryBulkStatePtr & state) const override;
|
||||||
|
|
||||||
|
void deserializeBinaryBulkWithMultipleStreams(
|
||||||
|
ColumnPtr & column,
|
||||||
|
size_t limit,
|
||||||
|
DeserializeBinaryBulkSettings & settings,
|
||||||
|
DeserializeBinaryBulkStatePtr & state,
|
||||||
|
SubstreamsCache * cache) const override;
|
||||||
|
|
||||||
|
void serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||||
|
void deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||||
|
void serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||||
|
void deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||||
|
void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||||
|
void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); }
|
||||||
|
bool tryDeserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); }
|
||||||
|
|
||||||
|
struct VariantNullMapSubcolumnCreator : public ISubcolumnCreator
|
||||||
|
{
|
||||||
|
const ColumnPtr local_discriminators;
|
||||||
|
const String variant_element_name;
|
||||||
|
const ColumnVariant::Discriminator global_variant_discriminator;
|
||||||
|
const ColumnVariant::Discriminator local_variant_discriminator;
|
||||||
|
|
||||||
|
VariantNullMapSubcolumnCreator(
|
||||||
|
const ColumnPtr & local_discriminators_,
|
||||||
|
const String & variant_element_name_,
|
||||||
|
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||||
|
ColumnVariant::Discriminator local_variant_discriminator_);
|
||||||
|
|
||||||
|
DataTypePtr create(const DataTypePtr & prev) const override;
|
||||||
|
ColumnPtr create(const ColumnPtr & prev) const override;
|
||||||
|
SerializationPtr create(const SerializationPtr & prev) const override;
|
||||||
|
};
|
||||||
|
private:
|
||||||
|
[[noreturn]] static void throwNoSerialization()
|
||||||
|
{
|
||||||
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Text/binary serialization is not implemented for variant element null map subcolumn");
|
||||||
|
}
|
||||||
|
|
||||||
|
friend SerializationVariant;
|
||||||
|
friend SerializationVariantElement;
|
||||||
|
|
||||||
|
/// To be able to deserialize Variant element null map as a subcolumn
|
||||||
|
/// we need variant element type name and global discriminator.
|
||||||
|
String variant_element_name;
|
||||||
|
ColumnVariant::Discriminator variant_discriminator;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -3,14 +3,14 @@
|
|||||||
#include <Columns/ColumnString.h>
|
#include <Columns/ColumnString.h>
|
||||||
#include <Columns/ColumnVector.h>
|
#include <Columns/ColumnVector.h>
|
||||||
#include <Columns/ColumnsNumber.h>
|
#include <Columns/ColumnsNumber.h>
|
||||||
#include <Common/BitHelpers.h>
|
|
||||||
#include <Common/BinStringDecodeHelper.h>
|
|
||||||
#include <DataTypes/DataTypeString.h>
|
#include <DataTypes/DataTypeString.h>
|
||||||
#include <Functions/FunctionFactory.h>
|
#include <Functions/FunctionFactory.h>
|
||||||
#include <Functions/IFunction.h>
|
#include <Functions/IFunction.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <Interpreters/Context_fwd.h>
|
#include <Interpreters/Context_fwd.h>
|
||||||
#include <Interpreters/castColumn.h>
|
#include <Interpreters/castColumn.h>
|
||||||
|
#include <Common/BinStringDecodeHelper.h>
|
||||||
|
#include <Common/BitHelpers.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -218,10 +218,7 @@ struct UnbinImpl
|
|||||||
static constexpr auto name = "unbin";
|
static constexpr auto name = "unbin";
|
||||||
static constexpr size_t word_size = 8;
|
static constexpr size_t word_size = 8;
|
||||||
|
|
||||||
static void decode(const char * pos, const char * end, char *& out)
|
static void decode(const char * pos, const char * end, char *& out) { binStringDecode(pos, end, out, word_size); }
|
||||||
{
|
|
||||||
binStringDecode(pos, end, out);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Encode number or string to string with binary or hexadecimal representation
|
/// Encode number or string to string with binary or hexadecimal representation
|
||||||
@ -651,7 +648,15 @@ public:
|
|||||||
|
|
||||||
size_t size = in_offsets.size();
|
size_t size = in_offsets.size();
|
||||||
out_offsets.resize(size);
|
out_offsets.resize(size);
|
||||||
out_vec.resize(in_vec.size() / word_size + size);
|
|
||||||
|
size_t max_out_len = 0;
|
||||||
|
for (size_t i = 0; i < in_offsets.size(); ++i)
|
||||||
|
{
|
||||||
|
const size_t len = in_offsets[i] - (i == 0 ? 0 : in_offsets[i - 1])
|
||||||
|
- /* trailing zero symbol that is always added in ColumnString and that is ignored while decoding */ 1;
|
||||||
|
max_out_len += (len + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1;
|
||||||
|
}
|
||||||
|
out_vec.resize(max_out_len);
|
||||||
|
|
||||||
char * begin = reinterpret_cast<char *>(out_vec.data());
|
char * begin = reinterpret_cast<char *>(out_vec.data());
|
||||||
char * pos = begin;
|
char * pos = begin;
|
||||||
@ -661,6 +666,7 @@ public:
|
|||||||
{
|
{
|
||||||
size_t new_offset = in_offsets[i];
|
size_t new_offset = in_offsets[i];
|
||||||
|
|
||||||
|
/// `new_offset - 1` because in ColumnString each string is stored with trailing zero byte
|
||||||
Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset - 1]), pos);
|
Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset - 1]), pos);
|
||||||
|
|
||||||
out_offsets[i] = pos - begin;
|
out_offsets[i] = pos - begin;
|
||||||
@ -668,6 +674,9 @@ public:
|
|||||||
prev_offset = new_offset;
|
prev_offset = new_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
chassert(
|
||||||
|
static_cast<size_t>(pos - begin) <= out_vec.size(),
|
||||||
|
fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size()));
|
||||||
out_vec.resize(pos - begin);
|
out_vec.resize(pos - begin);
|
||||||
|
|
||||||
return col_res;
|
return col_res;
|
||||||
@ -680,11 +689,11 @@ public:
|
|||||||
ColumnString::Offsets & out_offsets = col_res->getOffsets();
|
ColumnString::Offsets & out_offsets = col_res->getOffsets();
|
||||||
|
|
||||||
const ColumnString::Chars & in_vec = col_fix_string->getChars();
|
const ColumnString::Chars & in_vec = col_fix_string->getChars();
|
||||||
size_t n = col_fix_string->getN();
|
const size_t n = col_fix_string->getN();
|
||||||
|
|
||||||
size_t size = col_fix_string->size();
|
size_t size = col_fix_string->size();
|
||||||
out_offsets.resize(size);
|
out_offsets.resize(size);
|
||||||
out_vec.resize(in_vec.size() / word_size + size);
|
out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * size);
|
||||||
|
|
||||||
char * begin = reinterpret_cast<char *>(out_vec.data());
|
char * begin = reinterpret_cast<char *>(out_vec.data());
|
||||||
char * pos = begin;
|
char * pos = begin;
|
||||||
@ -694,6 +703,7 @@ public:
|
|||||||
{
|
{
|
||||||
size_t new_offset = prev_offset + n;
|
size_t new_offset = prev_offset + n;
|
||||||
|
|
||||||
|
/// here we don't subtract 1 from `new_offset` because in ColumnFixedString strings are stored without trailing zero byte
|
||||||
Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset]), pos);
|
Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset]), pos);
|
||||||
|
|
||||||
out_offsets[i] = pos - begin;
|
out_offsets[i] = pos - begin;
|
||||||
@ -701,6 +711,9 @@ public:
|
|||||||
prev_offset = new_offset;
|
prev_offset = new_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
chassert(
|
||||||
|
static_cast<size_t>(pos - begin) <= out_vec.size(),
|
||||||
|
fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size()));
|
||||||
out_vec.resize(pos - begin);
|
out_vec.resize(pos - begin);
|
||||||
|
|
||||||
return col_res;
|
return col_res;
|
||||||
|
@ -44,7 +44,7 @@ struct Memory : boost::noncopyable, Allocator
|
|||||||
char * m_data = nullptr;
|
char * m_data = nullptr;
|
||||||
size_t alignment = 0;
|
size_t alignment = 0;
|
||||||
|
|
||||||
[[maybe_unused]] bool allow_gwp_asan_force_sample;
|
[[maybe_unused]] bool allow_gwp_asan_force_sample{false};
|
||||||
|
|
||||||
Memory() = default;
|
Memory() = default;
|
||||||
|
|
||||||
|
@ -301,13 +301,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const
|
|||||||
auto & insert_query = query->as<ASTInsertQuery &>();
|
auto & insert_query = query->as<ASTInsertQuery &>();
|
||||||
insert_query.async_insert_flush = true;
|
insert_query.async_insert_flush = true;
|
||||||
|
|
||||||
InterpreterInsertQuery interpreter(
|
InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns);
|
||||||
query,
|
|
||||||
query_context,
|
|
||||||
query_context->getSettingsRef().insert_allow_materialized_columns,
|
|
||||||
/* no_squash */ false,
|
|
||||||
/* no_destination */ false,
|
|
||||||
/* async_insert */ false);
|
|
||||||
auto table = interpreter.getTable(insert_query);
|
auto table = interpreter.getTable(insert_query);
|
||||||
auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context);
|
auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context);
|
||||||
|
|
||||||
@ -787,12 +781,7 @@ try
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
interpreter = std::make_unique<InterpreterInsertQuery>(
|
interpreter = std::make_unique<InterpreterInsertQuery>(
|
||||||
key.query,
|
key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true);
|
||||||
insert_context,
|
|
||||||
key.settings.insert_allow_materialized_columns,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
true);
|
|
||||||
|
|
||||||
pipeline = interpreter->execute().pipeline;
|
pipeline = interpreter->execute().pipeline;
|
||||||
chassert(pipeline.pushing());
|
chassert(pipeline.pushing());
|
||||||
@ -1011,7 +1000,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Chunk chunk(executor.getResultColumns(), total_rows);
|
Chunk chunk(executor.getResultColumns(), total_rows);
|
||||||
chunk.getChunkInfos().add(std::move(chunk_info));
|
chunk.setChunkInfo(std::move(chunk_info));
|
||||||
return chunk;
|
return chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1063,7 +1052,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Chunk chunk(std::move(result_columns), total_rows);
|
Chunk chunk(std::move(result_columns), total_rows);
|
||||||
chunk.getChunkInfos().add(std::move(chunk_info));
|
chunk.setChunkInfo(std::move(chunk_info));
|
||||||
return chunk;
|
return chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
#include <Interpreters/InterpreterFactory.h>
|
#include <Interpreters/InterpreterFactory.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#include <Access/Common/AccessFlags.h>
|
#include <Access/Common/AccessFlags.h>
|
||||||
|
|
||||||
@ -23,7 +22,6 @@
|
|||||||
#include <Parsers/ASTCheckQuery.h>
|
#include <Parsers/ASTCheckQuery.h>
|
||||||
#include <Parsers/ASTSetQuery.h>
|
#include <Parsers/ASTSetQuery.h>
|
||||||
|
|
||||||
#include <Processors/Chunk.h>
|
|
||||||
#include <Processors/IAccumulatingTransform.h>
|
#include <Processors/IAccumulatingTransform.h>
|
||||||
#include <Processors/IInflatingTransform.h>
|
#include <Processors/IInflatingTransform.h>
|
||||||
#include <Processors/ISimpleTransform.h>
|
#include <Processors/ISimpleTransform.h>
|
||||||
@ -93,7 +91,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con
|
|||||||
return Chunk(std::move(columns), 1);
|
return Chunk(std::move(columns), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
class TableCheckTask : public ChunkInfoCloneable<TableCheckTask>
|
class TableCheckTask : public ChunkInfo
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
TableCheckTask(StorageID table_id, const std::variant<std::monostate, ASTPtr, String> & partition_or_part, ContextPtr context)
|
TableCheckTask(StorageID table_id, const std::variant<std::monostate, ASTPtr, String> & partition_or_part, ContextPtr context)
|
||||||
@ -112,12 +110,6 @@ public:
|
|||||||
context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID());
|
context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID());
|
||||||
}
|
}
|
||||||
|
|
||||||
TableCheckTask(const TableCheckTask & other)
|
|
||||||
: table(other.table)
|
|
||||||
, check_data_tasks(other.check_data_tasks)
|
|
||||||
, is_finished(other.is_finished.load())
|
|
||||||
{}
|
|
||||||
|
|
||||||
std::optional<CheckResult> checkNext() const
|
std::optional<CheckResult> checkNext() const
|
||||||
{
|
{
|
||||||
if (isFinished())
|
if (isFinished())
|
||||||
@ -129,8 +121,8 @@ public:
|
|||||||
std::this_thread::sleep_for(sleep_time);
|
std::this_thread::sleep_for(sleep_time);
|
||||||
});
|
});
|
||||||
|
|
||||||
IStorage::DataValidationTasksPtr tmp = check_data_tasks;
|
IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks;
|
||||||
auto result = table->checkDataNext(tmp);
|
auto result = table->checkDataNext(check_data_tasks_);
|
||||||
is_finished = !result.has_value();
|
is_finished = !result.has_value();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -188,7 +180,7 @@ protected:
|
|||||||
/// source should return at least one row to start pipeline
|
/// source should return at least one row to start pipeline
|
||||||
result.addColumn(ColumnUInt8::create(1, 1));
|
result.addColumn(ColumnUInt8::create(1, 1));
|
||||||
/// actual data stored in chunk info
|
/// actual data stored in chunk info
|
||||||
result.getChunkInfos().add(std::move(current_check_task));
|
result.setChunkInfo(std::move(current_check_task));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -288,7 +280,7 @@ public:
|
|||||||
protected:
|
protected:
|
||||||
void transform(Chunk & chunk) override
|
void transform(Chunk & chunk) override
|
||||||
{
|
{
|
||||||
auto table_check_task = chunk.getChunkInfos().get<TableCheckTask>();
|
auto table_check_task = std::dynamic_pointer_cast<const TableCheckTask>(chunk.getChunkInfo());
|
||||||
auto check_result = table_check_task->checkNext();
|
auto check_result = table_check_task->checkNext();
|
||||||
if (!check_result)
|
if (!check_result)
|
||||||
{
|
{
|
||||||
|
@ -1776,13 +1776,8 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create)
|
|||||||
else
|
else
|
||||||
insert->select = create.select->clone();
|
insert->select = create.select->clone();
|
||||||
|
|
||||||
return InterpreterInsertQuery(
|
return InterpreterInsertQuery(insert, getContext(),
|
||||||
insert,
|
getContext()->getSettingsRef().insert_allow_materialized_columns).execute();
|
||||||
getContext(),
|
|
||||||
getContext()->getSettingsRef().insert_allow_materialized_columns,
|
|
||||||
/* no_squash */ false,
|
|
||||||
/* no_destination */ false,
|
|
||||||
/* async_isnert */ false).execute();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
|
@ -534,13 +534,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
|
|||||||
}
|
}
|
||||||
else if (dynamic_cast<const ASTInsertQuery *>(ast.getExplainedQuery().get()))
|
else if (dynamic_cast<const ASTInsertQuery *>(ast.getExplainedQuery().get()))
|
||||||
{
|
{
|
||||||
InterpreterInsertQuery insert(
|
InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext());
|
||||||
ast.getExplainedQuery(),
|
|
||||||
getContext(),
|
|
||||||
/* allow_materialized */ false,
|
|
||||||
/* no_squash */ false,
|
|
||||||
/* no_destination */ false,
|
|
||||||
/* async_isnert */ false);
|
|
||||||
auto io = insert.execute();
|
auto io = insert.execute();
|
||||||
printPipeline(io.pipeline.getProcessors(), buf);
|
printPipeline(io.pipeline.getProcessors(), buf);
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,6 @@
|
|||||||
#include <Interpreters/getTableExpressions.h>
|
#include <Interpreters/getTableExpressions.h>
|
||||||
#include <Interpreters/processColumnTransformers.h>
|
#include <Interpreters/processColumnTransformers.h>
|
||||||
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
||||||
#include <Interpreters/Context_fwd.h>
|
|
||||||
#include <Parsers/ASTFunction.h>
|
#include <Parsers/ASTFunction.h>
|
||||||
#include <Parsers/ASTInsertQuery.h>
|
#include <Parsers/ASTInsertQuery.h>
|
||||||
#include <Parsers/ASTSelectQuery.h>
|
#include <Parsers/ASTSelectQuery.h>
|
||||||
@ -27,7 +26,6 @@
|
|||||||
#include <Processors/Transforms/CountingTransform.h>
|
#include <Processors/Transforms/CountingTransform.h>
|
||||||
#include <Processors/Transforms/ExpressionTransform.h>
|
#include <Processors/Transforms/ExpressionTransform.h>
|
||||||
#include <Processors/Transforms/MaterializingTransform.h>
|
#include <Processors/Transforms/MaterializingTransform.h>
|
||||||
#include <Processors/Transforms/DeduplicationTokenTransforms.h>
|
|
||||||
#include <Processors/Transforms/SquashingTransform.h>
|
#include <Processors/Transforms/SquashingTransform.h>
|
||||||
#include <Processors/Transforms/PlanSquashingTransform.h>
|
#include <Processors/Transforms/PlanSquashingTransform.h>
|
||||||
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
|
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
|
||||||
@ -40,7 +38,6 @@
|
|||||||
#include <Common/ThreadStatus.h>
|
#include <Common/ThreadStatus.h>
|
||||||
#include <Common/checkStackSize.h>
|
#include <Common/checkStackSize.h>
|
||||||
#include <Common/ProfileEvents.h>
|
#include <Common/ProfileEvents.h>
|
||||||
#include "base/defines.h"
|
|
||||||
|
|
||||||
|
|
||||||
namespace ProfileEvents
|
namespace ProfileEvents
|
||||||
@ -397,358 +394,28 @@ Chain InterpreterInsertQuery::buildPreSinkChain(
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::vector<Chain>, std::vector<Chain>> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block)
|
|
||||||
{
|
|
||||||
chassert(presink_streams > 0);
|
|
||||||
chassert(sink_streams > 0);
|
|
||||||
|
|
||||||
ThreadGroupPtr running_group;
|
|
||||||
if (current_thread)
|
|
||||||
running_group = current_thread->getThreadGroup();
|
|
||||||
if (!running_group)
|
|
||||||
running_group = std::make_shared<ThreadGroup>(getContext());
|
|
||||||
|
|
||||||
std::vector<Chain> sink_chains;
|
|
||||||
std::vector<Chain> presink_chains;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < sink_streams; ++i)
|
|
||||||
{
|
|
||||||
auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr,
|
|
||||||
running_group, /* elapsed_counter_ms= */ nullptr);
|
|
||||||
|
|
||||||
sink_chains.emplace_back(std::move(out));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < presink_streams; ++i)
|
|
||||||
{
|
|
||||||
auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block);
|
|
||||||
presink_chains.emplace_back(std::move(out));
|
|
||||||
}
|
|
||||||
|
|
||||||
return {std::move(presink_chains), std::move(sink_chains)};
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table)
|
|
||||||
{
|
|
||||||
const Settings & settings = getContext()->getSettingsRef();
|
|
||||||
|
|
||||||
auto metadata_snapshot = table->getInMemoryMetadataPtr();
|
|
||||||
auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);
|
|
||||||
|
|
||||||
bool is_trivial_insert_select = false;
|
|
||||||
|
|
||||||
if (settings.optimize_trivial_insert_select)
|
|
||||||
{
|
|
||||||
const auto & select_query = query.select->as<ASTSelectWithUnionQuery &>();
|
|
||||||
const auto & selects = select_query.list_of_selects->children;
|
|
||||||
const auto & union_modes = select_query.list_of_modes;
|
|
||||||
|
|
||||||
/// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries
|
|
||||||
const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; };
|
|
||||||
|
|
||||||
is_trivial_insert_select =
|
|
||||||
std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all))
|
|
||||||
&& std::all_of(selects.begin(), selects.end(), isTrivialSelect);
|
|
||||||
}
|
|
||||||
|
|
||||||
ContextPtr select_context = getContext();
|
|
||||||
|
|
||||||
if (is_trivial_insert_select)
|
|
||||||
{
|
|
||||||
/** When doing trivial INSERT INTO ... SELECT ... FROM table,
|
|
||||||
* don't need to process SELECT with more than max_insert_threads
|
|
||||||
* and it's reasonable to set block size for SELECT to the desired block size for INSERT
|
|
||||||
* to avoid unnecessary squashing.
|
|
||||||
*/
|
|
||||||
|
|
||||||
Settings new_settings = select_context->getSettings();
|
|
||||||
|
|
||||||
new_settings.max_threads = std::max<UInt64>(1, settings.max_insert_threads);
|
|
||||||
|
|
||||||
if (table->prefersLargeBlocks())
|
|
||||||
{
|
|
||||||
if (settings.min_insert_block_size_rows)
|
|
||||||
new_settings.max_block_size = settings.min_insert_block_size_rows;
|
|
||||||
if (settings.min_insert_block_size_bytes)
|
|
||||||
new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto context_for_trivial_select = Context::createCopy(context);
|
|
||||||
context_for_trivial_select->setSettings(new_settings);
|
|
||||||
context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames());
|
|
||||||
|
|
||||||
select_context = context_for_trivial_select;
|
|
||||||
}
|
|
||||||
|
|
||||||
QueryPipelineBuilder pipeline;
|
|
||||||
|
|
||||||
{
|
|
||||||
auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
|
|
||||||
|
|
||||||
if (settings.allow_experimental_analyzer)
|
|
||||||
{
|
|
||||||
InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options);
|
|
||||||
pipeline = interpreter_select_analyzer.buildQueryPipeline();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options);
|
|
||||||
pipeline = interpreter_select.buildQueryPipeline();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pipeline.dropTotalsAndExtremes();
|
|
||||||
|
|
||||||
/// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values.
|
|
||||||
if (getContext()->getSettingsRef().insert_null_as_default)
|
|
||||||
{
|
|
||||||
const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName();
|
|
||||||
const auto & query_columns = query_sample_block.getColumnsWithTypeAndName();
|
|
||||||
const auto & output_columns = metadata_snapshot->getColumns();
|
|
||||||
|
|
||||||
if (input_columns.size() == query_columns.size())
|
|
||||||
{
|
|
||||||
for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx)
|
|
||||||
{
|
|
||||||
/// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
|
|
||||||
/// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
|
|
||||||
if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type)
|
|
||||||
&& !isNullableOrLowCardinalityNullable(query_columns[col_idx].type)
|
|
||||||
&& !isVariant(query_columns[col_idx].type)
|
|
||||||
&& !isDynamic(query_columns[col_idx].type)
|
|
||||||
&& output_columns.has(query_columns[col_idx].name))
|
|
||||||
{
|
|
||||||
query_sample_block.setColumn(
|
|
||||||
col_idx,
|
|
||||||
ColumnWithTypeAndName(
|
|
||||||
makeNullableOrLowCardinalityNullable(query_columns[col_idx].column),
|
|
||||||
makeNullableOrLowCardinalityNullable(query_columns[col_idx].type),
|
|
||||||
query_columns[col_idx].name));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
auto actions_dag = ActionsDAG::makeConvertingActions(
|
|
||||||
pipeline.getHeader().getColumnsWithTypeAndName(),
|
|
||||||
query_sample_block.getColumnsWithTypeAndName(),
|
|
||||||
ActionsDAG::MatchColumnsMode::Position);
|
|
||||||
auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes));
|
|
||||||
|
|
||||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
|
||||||
{
|
|
||||||
return std::make_shared<ExpressionTransform>(in_header, actions);
|
|
||||||
});
|
|
||||||
|
|
||||||
/// We need to convert Sparse columns to full, because it's destination storage
|
|
||||||
/// may not support it or may have different settings for applying Sparse serialization.
|
|
||||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
|
||||||
{
|
|
||||||
return std::make_shared<MaterializingTransform>(in_header);
|
|
||||||
});
|
|
||||||
|
|
||||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
|
||||||
{
|
|
||||||
auto context_ptr = getContext();
|
|
||||||
auto counting = std::make_shared<CountingTransform>(in_header, nullptr, context_ptr->getQuota());
|
|
||||||
counting->setProcessListElement(context_ptr->getProcessListElement());
|
|
||||||
counting->setProgressCallback(context_ptr->getProgressCallback());
|
|
||||||
|
|
||||||
return counting;
|
|
||||||
});
|
|
||||||
|
|
||||||
size_t num_select_threads = pipeline.getNumThreads();
|
|
||||||
|
|
||||||
pipeline.resize(1);
|
|
||||||
|
|
||||||
if (shouldAddSquashingFroStorage(table))
|
|
||||||
{
|
|
||||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
|
||||||
{
|
|
||||||
return std::make_shared<PlanSquashingTransform>(
|
|
||||||
in_header,
|
|
||||||
table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size,
|
|
||||||
table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr
|
|
||||||
{
|
|
||||||
return std::make_shared<DeduplicationToken::AddTokenInfoTransform>(in_header);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!settings.insert_deduplication_token.value.empty())
|
|
||||||
{
|
|
||||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
|
||||||
{
|
|
||||||
return std::make_shared<DeduplicationToken::SetUserTokenTransform>(settings.insert_deduplication_token.value, in_header);
|
|
||||||
});
|
|
||||||
|
|
||||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
|
||||||
{
|
|
||||||
return std::make_shared<DeduplicationToken::SetSourceBlockNumberTransform>(in_header);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Number of streams works like this:
|
|
||||||
/// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever
|
|
||||||
/// InterpreterSelectQuery ends up with.
|
|
||||||
/// * Use `max_insert_threads` streams for various insert-preparation steps, e.g.
|
|
||||||
/// materializing and squashing (too slow to do in one thread). That's `presink_chains`.
|
|
||||||
/// * If the table supports parallel inserts, use max_insert_threads for writing to IStorage.
|
|
||||||
/// Otherwise ResizeProcessor them down to 1 stream.
|
|
||||||
|
|
||||||
size_t presink_streams_size = std::max<size_t>(settings.max_insert_threads, pipeline.getNumStreams());
|
|
||||||
|
|
||||||
size_t sink_streams_size = table->supportsParallelInsert() ? std::max<size_t>(1, settings.max_insert_threads) : 1;
|
|
||||||
|
|
||||||
if (!settings.parallel_view_processing)
|
|
||||||
{
|
|
||||||
auto table_id = table->getStorageID();
|
|
||||||
auto views = DatabaseCatalog::instance().getDependentViews(table_id);
|
|
||||||
|
|
||||||
if (table->isView() || !views.empty())
|
|
||||||
sink_streams_size = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto [presink_chains, sink_chains] = buildPreAndSinkChains(
|
|
||||||
presink_streams_size, sink_streams_size,
|
|
||||||
table, metadata_snapshot, query_sample_block);
|
|
||||||
|
|
||||||
pipeline.resize(presink_chains.size());
|
|
||||||
|
|
||||||
if (shouldAddSquashingFroStorage(table))
|
|
||||||
{
|
|
||||||
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
|
||||||
{
|
|
||||||
return std::make_shared<ApplySquashingTransform>(
|
|
||||||
in_header,
|
|
||||||
table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size,
|
|
||||||
table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto & chain : presink_chains)
|
|
||||||
pipeline.addResources(chain.detachResources());
|
|
||||||
pipeline.addChains(std::move(presink_chains));
|
|
||||||
|
|
||||||
pipeline.resize(sink_streams_size);
|
|
||||||
|
|
||||||
for (auto & chain : sink_chains)
|
|
||||||
pipeline.addResources(chain.detachResources());
|
|
||||||
pipeline.addChains(std::move(sink_chains));
|
|
||||||
|
|
||||||
if (!settings.parallel_view_processing)
|
|
||||||
{
|
|
||||||
/// Don't use more threads for INSERT than for SELECT to reduce memory consumption.
|
|
||||||
if (pipeline.getNumThreads() > num_select_threads)
|
|
||||||
pipeline.setMaxThreads(num_select_threads);
|
|
||||||
}
|
|
||||||
else if (pipeline.getNumThreads() < settings.max_threads)
|
|
||||||
{
|
|
||||||
/// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select,
|
|
||||||
/// however in case of parallel_view_processing and multiple views, views can still be processed in parallel.
|
|
||||||
///
|
|
||||||
/// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads.
|
|
||||||
pipeline.setMaxThreads(settings.max_threads);
|
|
||||||
}
|
|
||||||
|
|
||||||
pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr
|
|
||||||
{
|
|
||||||
return std::make_shared<EmptySink>(cur_header);
|
|
||||||
});
|
|
||||||
|
|
||||||
return QueryPipelineBuilder::getPipeline(std::move(pipeline));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table)
|
|
||||||
{
|
|
||||||
const Settings & settings = getContext()->getSettingsRef();
|
|
||||||
|
|
||||||
auto metadata_snapshot = table->getInMemoryMetadataPtr();
|
|
||||||
auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);
|
|
||||||
|
|
||||||
Chain chain;
|
|
||||||
|
|
||||||
{
|
|
||||||
auto [presink_chains, sink_chains] = buildPreAndSinkChains(
|
|
||||||
/* presink_streams */1, /* sink_streams */1,
|
|
||||||
table, metadata_snapshot, query_sample_block);
|
|
||||||
|
|
||||||
chain = std::move(presink_chains.front());
|
|
||||||
chain.appendChain(std::move(sink_chains.front()));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!settings.insert_deduplication_token.value.empty())
|
|
||||||
{
|
|
||||||
chain.addSource(std::make_shared<DeduplicationToken::SetSourceBlockNumberTransform>(chain.getInputHeader()));
|
|
||||||
chain.addSource(std::make_shared<DeduplicationToken::SetUserTokenTransform>(settings.insert_deduplication_token.value, chain.getInputHeader()));
|
|
||||||
}
|
|
||||||
|
|
||||||
chain.addSource(std::make_shared<DeduplicationToken::AddTokenInfoTransform>(chain.getInputHeader()));
|
|
||||||
|
|
||||||
if (shouldAddSquashingFroStorage(table))
|
|
||||||
{
|
|
||||||
bool table_prefers_large_blocks = table->prefersLargeBlocks();
|
|
||||||
|
|
||||||
auto squashing = std::make_shared<ApplySquashingTransform>(
|
|
||||||
chain.getInputHeader(),
|
|
||||||
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
|
||||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
|
||||||
|
|
||||||
chain.addSource(std::move(squashing));
|
|
||||||
|
|
||||||
auto balancing = std::make_shared<PlanSquashingTransform>(
|
|
||||||
chain.getInputHeader(),
|
|
||||||
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
|
||||||
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
|
||||||
|
|
||||||
chain.addSource(std::move(balancing));
|
|
||||||
}
|
|
||||||
|
|
||||||
auto context_ptr = getContext();
|
|
||||||
auto counting = std::make_shared<CountingTransform>(chain.getInputHeader(), nullptr, context_ptr->getQuota());
|
|
||||||
counting->setProcessListElement(context_ptr->getProcessListElement());
|
|
||||||
counting->setProgressCallback(context_ptr->getProgressCallback());
|
|
||||||
chain.addSource(std::move(counting));
|
|
||||||
|
|
||||||
QueryPipeline pipeline = QueryPipeline(std::move(chain));
|
|
||||||
|
|
||||||
pipeline.setNumThreads(std::min<size_t>(pipeline.getNumThreads(), settings.max_threads));
|
|
||||||
pipeline.setConcurrencyControl(settings.use_concurrency_control);
|
|
||||||
|
|
||||||
if (query.hasInlinedData() && !async_insert)
|
|
||||||
{
|
|
||||||
/// can execute without additional data
|
|
||||||
auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr);
|
|
||||||
for (auto && buffer : owned_buffers)
|
|
||||||
format->addBuffer(std::move(buffer));
|
|
||||||
|
|
||||||
auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr);
|
|
||||||
pipeline.complete(std::move(pipe));
|
|
||||||
}
|
|
||||||
|
|
||||||
return pipeline;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
BlockIO InterpreterInsertQuery::execute()
|
BlockIO InterpreterInsertQuery::execute()
|
||||||
{
|
{
|
||||||
const Settings & settings = getContext()->getSettingsRef();
|
const Settings & settings = getContext()->getSettingsRef();
|
||||||
auto & query = query_ptr->as<ASTInsertQuery &>();
|
auto & query = query_ptr->as<ASTInsertQuery &>();
|
||||||
|
|
||||||
|
QueryPipelineBuilder pipeline;
|
||||||
|
std::optional<QueryPipeline> distributed_pipeline;
|
||||||
|
QueryPlanResourceHolder resources;
|
||||||
|
|
||||||
StoragePtr table = getTable(query);
|
StoragePtr table = getTable(query);
|
||||||
checkStorageSupportsTransactionsIfNeeded(table, getContext());
|
checkStorageSupportsTransactionsIfNeeded(table, getContext());
|
||||||
|
|
||||||
|
StoragePtr inner_table;
|
||||||
|
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
|
||||||
|
inner_table = mv->getTargetTable();
|
||||||
|
|
||||||
if (query.partition_by && !table->supportsPartitionBy())
|
if (query.partition_by && !table->supportsPartitionBy())
|
||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage");
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage");
|
||||||
|
|
||||||
auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout);
|
auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout);
|
||||||
|
|
||||||
auto metadata_snapshot = table->getInMemoryMetadataPtr();
|
auto metadata_snapshot = table->getInMemoryMetadataPtr();
|
||||||
|
|
||||||
auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);
|
auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized);
|
||||||
|
|
||||||
/// For table functions we check access while executing
|
/// For table functions we check access while executing
|
||||||
@ -756,43 +423,320 @@ BlockIO InterpreterInsertQuery::execute()
|
|||||||
if (!query.table_function)
|
if (!query.table_function)
|
||||||
getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames());
|
getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames());
|
||||||
|
|
||||||
if (!allow_materialized)
|
if (query.select && settings.parallel_distributed_insert_select)
|
||||||
|
// Distributed INSERT SELECT
|
||||||
|
distributed_pipeline = table->distributedWrite(query, getContext());
|
||||||
|
|
||||||
|
std::vector<Chain> presink_chains;
|
||||||
|
std::vector<Chain> sink_chains;
|
||||||
|
if (!distributed_pipeline)
|
||||||
{
|
{
|
||||||
for (const auto & column : metadata_snapshot->getColumns())
|
/// Number of streams works like this:
|
||||||
if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name))
|
/// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever
|
||||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name);
|
/// InterpreterSelectQuery ends up with.
|
||||||
|
/// * Use `max_insert_threads` streams for various insert-preparation steps, e.g.
|
||||||
|
/// materializing and squashing (too slow to do in one thread). That's `presink_chains`.
|
||||||
|
/// * If the table supports parallel inserts, use the same streams for writing to IStorage.
|
||||||
|
/// Otherwise ResizeProcessor them down to 1 stream.
|
||||||
|
/// * If it's not an INSERT SELECT, forget all that and use one stream.
|
||||||
|
size_t pre_streams_size = 1;
|
||||||
|
size_t sink_streams_size = 1;
|
||||||
|
|
||||||
|
if (query.select)
|
||||||
|
{
|
||||||
|
bool is_trivial_insert_select = false;
|
||||||
|
|
||||||
|
if (settings.optimize_trivial_insert_select)
|
||||||
|
{
|
||||||
|
const auto & select_query = query.select->as<ASTSelectWithUnionQuery &>();
|
||||||
|
const auto & selects = select_query.list_of_selects->children;
|
||||||
|
const auto & union_modes = select_query.list_of_modes;
|
||||||
|
|
||||||
|
/// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries
|
||||||
|
const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; };
|
||||||
|
|
||||||
|
is_trivial_insert_select =
|
||||||
|
std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all))
|
||||||
|
&& std::all_of(selects.begin(), selects.end(), isTrivialSelect);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_trivial_insert_select)
|
||||||
|
{
|
||||||
|
/** When doing trivial INSERT INTO ... SELECT ... FROM table,
|
||||||
|
* don't need to process SELECT with more than max_insert_threads
|
||||||
|
* and it's reasonable to set block size for SELECT to the desired block size for INSERT
|
||||||
|
* to avoid unnecessary squashing.
|
||||||
|
*/
|
||||||
|
|
||||||
|
Settings new_settings = getContext()->getSettings();
|
||||||
|
|
||||||
|
new_settings.max_threads = std::max<UInt64>(1, settings.max_insert_threads);
|
||||||
|
|
||||||
|
if (table->prefersLargeBlocks())
|
||||||
|
{
|
||||||
|
if (settings.min_insert_block_size_rows)
|
||||||
|
new_settings.max_block_size = settings.min_insert_block_size_rows;
|
||||||
|
if (settings.min_insert_block_size_bytes)
|
||||||
|
new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto new_context = Context::createCopy(context);
|
||||||
|
new_context->setSettings(new_settings);
|
||||||
|
new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames());
|
||||||
|
|
||||||
|
auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
|
||||||
|
|
||||||
|
if (settings.allow_experimental_analyzer)
|
||||||
|
{
|
||||||
|
InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options);
|
||||||
|
pipeline = interpreter_select_analyzer.buildQueryPipeline();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options);
|
||||||
|
pipeline = interpreter_select.buildQueryPipeline();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/// Passing 1 as subquery_depth will disable limiting size of intermediate result.
|
||||||
|
auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1);
|
||||||
|
|
||||||
|
if (settings.allow_experimental_analyzer)
|
||||||
|
{
|
||||||
|
InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options);
|
||||||
|
pipeline = interpreter_select_analyzer.buildQueryPipeline();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options);
|
||||||
|
pipeline = interpreter_select.buildQueryPipeline();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline.dropTotalsAndExtremes();
|
||||||
|
|
||||||
|
if (settings.max_insert_threads > 1)
|
||||||
|
{
|
||||||
|
auto table_id = table->getStorageID();
|
||||||
|
auto views = DatabaseCatalog::instance().getDependentViews(table_id);
|
||||||
|
|
||||||
|
/// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them.
|
||||||
|
/// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts.
|
||||||
|
const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert();
|
||||||
|
pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads
|
||||||
|
: std::min<size_t>(settings.max_insert_threads, pipeline.getNumStreams());
|
||||||
|
|
||||||
|
/// Deduplication when passing insert_deduplication_token breaks if using more than one thread
|
||||||
|
if (!settings.insert_deduplication_token.toString().empty())
|
||||||
|
{
|
||||||
|
LOG_DEBUG(
|
||||||
|
getLogger("InsertQuery"),
|
||||||
|
"Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues");
|
||||||
|
pre_streams_size = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (table->supportsParallelInsert())
|
||||||
|
sink_streams_size = pre_streams_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline.resize(pre_streams_size);
|
||||||
|
|
||||||
|
/// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values.
|
||||||
|
if (getContext()->getSettingsRef().insert_null_as_default)
|
||||||
|
{
|
||||||
|
const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName();
|
||||||
|
const auto & query_columns = query_sample_block.getColumnsWithTypeAndName();
|
||||||
|
const auto & output_columns = metadata_snapshot->getColumns();
|
||||||
|
|
||||||
|
if (input_columns.size() == query_columns.size())
|
||||||
|
{
|
||||||
|
for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx)
|
||||||
|
{
|
||||||
|
/// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
|
||||||
|
/// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
|
||||||
|
if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type)
|
||||||
|
&& !isNullableOrLowCardinalityNullable(query_columns[col_idx].type)
|
||||||
|
&& !isVariant(query_columns[col_idx].type)
|
||||||
|
&& !isDynamic(query_columns[col_idx].type)
|
||||||
|
&& output_columns.has(query_columns[col_idx].name))
|
||||||
|
query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ThreadGroupPtr running_group;
|
||||||
|
if (current_thread)
|
||||||
|
running_group = current_thread->getThreadGroup();
|
||||||
|
if (!running_group)
|
||||||
|
running_group = std::make_shared<ThreadGroup>(getContext());
|
||||||
|
for (size_t i = 0; i < sink_streams_size; ++i)
|
||||||
|
{
|
||||||
|
auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr,
|
||||||
|
running_group, /* elapsed_counter_ms= */ nullptr);
|
||||||
|
sink_chains.emplace_back(std::move(out));
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < pre_streams_size; ++i)
|
||||||
|
{
|
||||||
|
auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block);
|
||||||
|
presink_chains.emplace_back(std::move(out));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockIO res;
|
BlockIO res;
|
||||||
|
|
||||||
if (query.select)
|
/// What type of query: INSERT or INSERT SELECT or INSERT WATCH?
|
||||||
|
if (distributed_pipeline)
|
||||||
{
|
{
|
||||||
if (settings.parallel_distributed_insert_select)
|
res.pipeline = std::move(*distributed_pipeline);
|
||||||
|
}
|
||||||
|
else if (query.select)
|
||||||
|
{
|
||||||
|
const auto & header = presink_chains.at(0).getInputHeader();
|
||||||
|
auto actions_dag = ActionsDAG::makeConvertingActions(
|
||||||
|
pipeline.getHeader().getColumnsWithTypeAndName(),
|
||||||
|
header.getColumnsWithTypeAndName(),
|
||||||
|
ActionsDAG::MatchColumnsMode::Position);
|
||||||
|
auto actions = std::make_shared<ExpressionActions>(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes));
|
||||||
|
|
||||||
|
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||||
{
|
{
|
||||||
auto distributed = table->distributedWrite(query, getContext());
|
return std::make_shared<ExpressionTransform>(in_header, actions);
|
||||||
if (distributed)
|
});
|
||||||
{
|
|
||||||
res.pipeline = std::move(*distributed);
|
/// We need to convert Sparse columns to full, because it's destination storage
|
||||||
}
|
/// may not support it or may have different settings for applying Sparse serialization.
|
||||||
else
|
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||||
{
|
|
||||||
res.pipeline = buildInsertSelectPipeline(query, table);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
res.pipeline = buildInsertSelectPipeline(query, table);
|
return std::make_shared<MaterializingTransform>(in_header);
|
||||||
|
});
|
||||||
|
|
||||||
|
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||||
|
{
|
||||||
|
auto context_ptr = getContext();
|
||||||
|
auto counting = std::make_shared<CountingTransform>(in_header, nullptr, context_ptr->getQuota());
|
||||||
|
counting->setProcessListElement(context_ptr->getProcessListElement());
|
||||||
|
counting->setProgressCallback(context_ptr->getProgressCallback());
|
||||||
|
|
||||||
|
return counting;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (shouldAddSquashingFroStorage(table))
|
||||||
|
{
|
||||||
|
bool table_prefers_large_blocks = table->prefersLargeBlocks();
|
||||||
|
|
||||||
|
size_t threads = presink_chains.size();
|
||||||
|
|
||||||
|
pipeline.resize(1);
|
||||||
|
|
||||||
|
pipeline.addTransform(std::make_shared<PlanSquashingTransform>(
|
||||||
|
header,
|
||||||
|
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||||
|
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL));
|
||||||
|
|
||||||
|
pipeline.resize(threads);
|
||||||
|
|
||||||
|
pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr
|
||||||
|
{
|
||||||
|
return std::make_shared<ApplySquashingTransform>(
|
||||||
|
in_header,
|
||||||
|
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||||
|
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t num_select_threads = pipeline.getNumThreads();
|
||||||
|
|
||||||
|
for (auto & chain : presink_chains)
|
||||||
|
resources = chain.detachResources();
|
||||||
|
for (auto & chain : sink_chains)
|
||||||
|
resources = chain.detachResources();
|
||||||
|
|
||||||
|
pipeline.addChains(std::move(presink_chains));
|
||||||
|
pipeline.resize(sink_chains.size());
|
||||||
|
pipeline.addChains(std::move(sink_chains));
|
||||||
|
|
||||||
|
if (!settings.parallel_view_processing)
|
||||||
|
{
|
||||||
|
/// Don't use more threads for INSERT than for SELECT to reduce memory consumption.
|
||||||
|
if (pipeline.getNumThreads() > num_select_threads)
|
||||||
|
pipeline.setMaxThreads(num_select_threads);
|
||||||
|
}
|
||||||
|
else if (pipeline.getNumThreads() < settings.max_threads)
|
||||||
|
{
|
||||||
|
/// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select,
|
||||||
|
/// however in case of parallel_view_processing and multiple views, views can still be processed in parallel.
|
||||||
|
///
|
||||||
|
/// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads.
|
||||||
|
pipeline.setMaxThreads(settings.max_threads);
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr
|
||||||
|
{
|
||||||
|
return std::make_shared<EmptySink>(cur_header);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!allow_materialized)
|
||||||
|
{
|
||||||
|
for (const auto & column : metadata_snapshot->getColumns())
|
||||||
|
if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name))
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
res.pipeline = buildInsertPipeline(query, table);
|
auto & chain = presink_chains.at(0);
|
||||||
|
chain.appendChain(std::move(sink_chains.at(0)));
|
||||||
|
|
||||||
|
if (shouldAddSquashingFroStorage(table))
|
||||||
|
{
|
||||||
|
bool table_prefers_large_blocks = table->prefersLargeBlocks();
|
||||||
|
|
||||||
|
auto squashing = std::make_shared<ApplySquashingTransform>(
|
||||||
|
chain.getInputHeader(),
|
||||||
|
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||||
|
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
||||||
|
|
||||||
|
chain.addSource(std::move(squashing));
|
||||||
|
|
||||||
|
auto balancing = std::make_shared<PlanSquashingTransform>(
|
||||||
|
chain.getInputHeader(),
|
||||||
|
table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size,
|
||||||
|
table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL);
|
||||||
|
|
||||||
|
chain.addSource(std::move(balancing));
|
||||||
|
}
|
||||||
|
|
||||||
|
auto context_ptr = getContext();
|
||||||
|
auto counting = std::make_shared<CountingTransform>(chain.getInputHeader(), nullptr, context_ptr->getQuota());
|
||||||
|
counting->setProcessListElement(context_ptr->getProcessListElement());
|
||||||
|
counting->setProgressCallback(context_ptr->getProgressCallback());
|
||||||
|
chain.addSource(std::move(counting));
|
||||||
|
|
||||||
|
res.pipeline = QueryPipeline(std::move(presink_chains[0]));
|
||||||
|
res.pipeline.setNumThreads(std::min<size_t>(res.pipeline.getNumThreads(), settings.max_threads));
|
||||||
|
res.pipeline.setConcurrencyControl(settings.use_concurrency_control);
|
||||||
|
|
||||||
|
if (query.hasInlinedData() && !async_insert)
|
||||||
|
{
|
||||||
|
/// can execute without additional data
|
||||||
|
auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr);
|
||||||
|
for (auto && buffer : owned_buffers)
|
||||||
|
format->addBuffer(std::move(buffer));
|
||||||
|
|
||||||
|
auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr);
|
||||||
|
res.pipeline.complete(std::move(pipe));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
res.pipeline.addStorageHolder(table);
|
res.pipeline.addResources(std::move(resources));
|
||||||
|
|
||||||
if (const auto * mv = dynamic_cast<const StorageMaterializedView *>(table.get()))
|
res.pipeline.addStorageHolder(table);
|
||||||
res.pipeline.addStorageHolder(mv->getTargetTable());
|
if (inner_table)
|
||||||
|
res.pipeline.addStorageHolder(inner_table);
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
@ -813,27 +757,17 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const
|
void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const
|
||||||
{
|
{
|
||||||
extendQueryLogElemImpl(elem, context_);
|
extendQueryLogElemImpl(elem, context_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void registerInterpreterInsertQuery(InterpreterFactory & factory)
|
void registerInterpreterInsertQuery(InterpreterFactory & factory)
|
||||||
{
|
{
|
||||||
auto create_fn = [] (const InterpreterFactory::Arguments & args)
|
auto create_fn = [] (const InterpreterFactory::Arguments & args)
|
||||||
{
|
{
|
||||||
return std::make_unique<InterpreterInsertQuery>(
|
return std::make_unique<InterpreterInsertQuery>(args.query, args.context, args.allow_materialized);
|
||||||
args.query,
|
|
||||||
args.context,
|
|
||||||
args.allow_materialized,
|
|
||||||
/* no_squash */false,
|
|
||||||
/* no_destination */false,
|
|
||||||
/* async_insert */false);
|
|
||||||
};
|
};
|
||||||
factory.registerInterpreter("InterpreterInsertQuery", create_fn);
|
factory.registerInterpreter("InterpreterInsertQuery", create_fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -23,10 +23,10 @@ public:
|
|||||||
InterpreterInsertQuery(
|
InterpreterInsertQuery(
|
||||||
const ASTPtr & query_ptr_,
|
const ASTPtr & query_ptr_,
|
||||||
ContextPtr context_,
|
ContextPtr context_,
|
||||||
bool allow_materialized_,
|
bool allow_materialized_ = false,
|
||||||
bool no_squash_,
|
bool no_squash_ = false,
|
||||||
bool no_destination,
|
bool no_destination_ = false,
|
||||||
bool async_insert_);
|
bool async_insert_ = false);
|
||||||
|
|
||||||
/** Prepare a request for execution. Return block streams
|
/** Prepare a request for execution. Return block streams
|
||||||
* - the stream into which you can write data to execute the query, if INSERT;
|
* - the stream into which you can write data to execute the query, if INSERT;
|
||||||
@ -73,17 +73,12 @@ private:
|
|||||||
|
|
||||||
ASTPtr query_ptr;
|
ASTPtr query_ptr;
|
||||||
const bool allow_materialized;
|
const bool allow_materialized;
|
||||||
bool no_squash = false;
|
const bool no_squash;
|
||||||
bool no_destination = false;
|
const bool no_destination;
|
||||||
const bool async_insert;
|
const bool async_insert;
|
||||||
|
|
||||||
std::vector<std::unique_ptr<ReadBuffer>> owned_buffers;
|
std::vector<std::unique_ptr<ReadBuffer>> owned_buffers;
|
||||||
|
|
||||||
std::pair<std::vector<Chain>, std::vector<Chain>> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block);
|
|
||||||
|
|
||||||
QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table);
|
|
||||||
QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table);
|
|
||||||
|
|
||||||
Chain buildSink(
|
Chain buildSink(
|
||||||
const StoragePtr & table,
|
const StoragePtr & table,
|
||||||
const StorageMetadataPtr & metadata_snapshot,
|
const StorageMetadataPtr & metadata_snapshot,
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <Interpreters/Squashing.h>
|
#include <Interpreters/Squashing.h>
|
||||||
#include <Common/CurrentThread.h>
|
#include <Common/CurrentThread.h>
|
||||||
#include <base/defines.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -12,33 +11,24 @@ namespace ErrorCodes
|
|||||||
}
|
}
|
||||||
|
|
||||||
Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_)
|
Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_)
|
||||||
: min_block_size_rows(min_block_size_rows_)
|
: header(header_)
|
||||||
|
, min_block_size_rows(min_block_size_rows_)
|
||||||
, min_block_size_bytes(min_block_size_bytes_)
|
, min_block_size_bytes(min_block_size_bytes_)
|
||||||
, header(header_)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk Squashing::flush()
|
Chunk Squashing::flush()
|
||||||
{
|
{
|
||||||
if (!accumulated)
|
return convertToChunk(std::move(chunks_to_merge_vec));
|
||||||
return {};
|
|
||||||
|
|
||||||
auto result = convertToChunk(accumulated.extract());
|
|
||||||
chassert(result);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk Squashing::squash(Chunk && input_chunk)
|
Chunk Squashing::squash(Chunk && input_chunk)
|
||||||
{
|
{
|
||||||
if (!input_chunk)
|
if (!input_chunk.hasChunkInfo())
|
||||||
return Chunk();
|
return Chunk();
|
||||||
|
|
||||||
auto squash_info = input_chunk.getChunkInfos().extract<ChunksToSquash>();
|
const auto *info = getInfoFromChunk(input_chunk);
|
||||||
|
return squash(info->chunks);
|
||||||
if (!squash_info)
|
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr");
|
|
||||||
|
|
||||||
return squash(std::move(squash_info->chunks), std::move(input_chunk.getChunkInfos()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk Squashing::add(Chunk && input_chunk)
|
Chunk Squashing::add(Chunk && input_chunk)
|
||||||
@ -47,37 +37,48 @@ Chunk Squashing::add(Chunk && input_chunk)
|
|||||||
return {};
|
return {};
|
||||||
|
|
||||||
/// Just read block is already enough.
|
/// Just read block is already enough.
|
||||||
if (isEnoughSize(input_chunk))
|
if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes()))
|
||||||
{
|
{
|
||||||
/// If no accumulated data, return just read block.
|
/// If no accumulated data, return just read block.
|
||||||
if (!accumulated)
|
if (chunks_to_merge_vec.empty())
|
||||||
{
|
{
|
||||||
accumulated.add(std::move(input_chunk));
|
chunks_to_merge_vec.push_back(std::move(input_chunk));
|
||||||
return convertToChunk(accumulated.extract());
|
Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
|
||||||
|
chunks_to_merge_vec.clear();
|
||||||
|
return res_chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return accumulated data (maybe it has small size) and place new block to accumulated data.
|
/// Return accumulated data (maybe it has small size) and place new block to accumulated data.
|
||||||
Chunk res_chunk = convertToChunk(accumulated.extract());
|
Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
|
||||||
accumulated.add(std::move(input_chunk));
|
chunks_to_merge_vec.clear();
|
||||||
|
changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes());
|
||||||
|
chunks_to_merge_vec.push_back(std::move(input_chunk));
|
||||||
return res_chunk;
|
return res_chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Accumulated block is already enough.
|
/// Accumulated block is already enough.
|
||||||
if (isEnoughSize())
|
if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes))
|
||||||
{
|
{
|
||||||
/// Return accumulated data and place new block to accumulated data.
|
/// Return accumulated data and place new block to accumulated data.
|
||||||
Chunk res_chunk = convertToChunk(accumulated.extract());
|
Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
|
||||||
accumulated.add(std::move(input_chunk));
|
chunks_to_merge_vec.clear();
|
||||||
|
changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes());
|
||||||
|
chunks_to_merge_vec.push_back(std::move(input_chunk));
|
||||||
return res_chunk;
|
return res_chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Pushing data into accumulating vector
|
/// Pushing data into accumulating vector
|
||||||
accumulated.add(std::move(input_chunk));
|
expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes());
|
||||||
|
chunks_to_merge_vec.push_back(std::move(input_chunk));
|
||||||
|
|
||||||
/// If accumulated data is big enough, we send it
|
/// If accumulated data is big enough, we send it
|
||||||
if (isEnoughSize())
|
if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes))
|
||||||
return convertToChunk(accumulated.extract());
|
{
|
||||||
|
Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec));
|
||||||
|
changeCurrentSize(0, 0);
|
||||||
|
chunks_to_merge_vec.clear();
|
||||||
|
return res_chunk;
|
||||||
|
}
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -89,15 +90,14 @@ Chunk Squashing::convertToChunk(std::vector<Chunk> && chunks) const
|
|||||||
auto info = std::make_shared<ChunksToSquash>();
|
auto info = std::make_shared<ChunksToSquash>();
|
||||||
info->chunks = std::move(chunks);
|
info->chunks = std::move(chunks);
|
||||||
|
|
||||||
// It is imortant that chunk is not empty, it has to have columns even if they are empty
|
chunks.clear();
|
||||||
auto aggr_chunk = Chunk(header.getColumns(), 0);
|
|
||||||
aggr_chunk.getChunkInfos().add(std::move(info));
|
return Chunk(header.cloneEmptyColumns(), 0, info);
|
||||||
chassert(aggr_chunk);
|
|
||||||
return aggr_chunk;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk Squashing::squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoCollection && infos)
|
Chunk Squashing::squash(std::vector<Chunk> & input_chunks)
|
||||||
{
|
{
|
||||||
|
Chunk accumulated_chunk;
|
||||||
std::vector<IColumn::MutablePtr> mutable_columns = {};
|
std::vector<IColumn::MutablePtr> mutable_columns = {};
|
||||||
size_t rows = 0;
|
size_t rows = 0;
|
||||||
for (const Chunk & chunk : input_chunks)
|
for (const Chunk & chunk : input_chunks)
|
||||||
@ -119,17 +119,35 @@ Chunk Squashing::squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoColl
|
|||||||
for (size_t j = 0, size = mutable_columns.size(); j < size; ++j)
|
for (size_t j = 0, size = mutable_columns.size(); j < size; ++j)
|
||||||
{
|
{
|
||||||
const auto source_column = columns[j];
|
const auto source_column = columns[j];
|
||||||
|
|
||||||
mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size());
|
mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
accumulated_chunk.setColumns(std::move(mutable_columns), rows);
|
||||||
|
return accumulated_chunk;
|
||||||
|
}
|
||||||
|
|
||||||
Chunk result;
|
const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk)
|
||||||
result.setColumns(std::move(mutable_columns), rows);
|
{
|
||||||
result.setChunkInfos(infos);
|
const auto& info = chunk.getChunkInfo();
|
||||||
result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos()));
|
const auto * agg_info = typeid_cast<const ChunksToSquash *>(info.get());
|
||||||
|
|
||||||
chassert(result);
|
if (!agg_info)
|
||||||
return result;
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr");
|
||||||
|
|
||||||
|
return agg_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Squashing::expandCurrentSize(size_t rows, size_t bytes)
|
||||||
|
{
|
||||||
|
accumulated_size.rows += rows;
|
||||||
|
accumulated_size.bytes += bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Squashing::changeCurrentSize(size_t rows, size_t bytes)
|
||||||
|
{
|
||||||
|
accumulated_size.rows = rows;
|
||||||
|
accumulated_size.bytes = bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Squashing::isEnoughSize(size_t rows, size_t bytes) const
|
bool Squashing::isEnoughSize(size_t rows, size_t bytes) const
|
||||||
@ -138,28 +156,4 @@ bool Squashing::isEnoughSize(size_t rows, size_t bytes) const
|
|||||||
|| (min_block_size_rows && rows >= min_block_size_rows)
|
|| (min_block_size_rows && rows >= min_block_size_rows)
|
||||||
|| (min_block_size_bytes && bytes >= min_block_size_bytes);
|
|| (min_block_size_bytes && bytes >= min_block_size_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Squashing::isEnoughSize() const
|
|
||||||
{
|
|
||||||
return isEnoughSize(accumulated.getRows(), accumulated.getBytes());
|
|
||||||
};
|
|
||||||
|
|
||||||
bool Squashing::isEnoughSize(const Chunk & chunk) const
|
|
||||||
{
|
|
||||||
return isEnoughSize(chunk.getNumRows(), chunk.bytes());
|
|
||||||
}
|
|
||||||
|
|
||||||
void Squashing::CurrentSize::add(Chunk && chunk)
|
|
||||||
{
|
|
||||||
rows += chunk.getNumRows();
|
|
||||||
bytes += chunk.bytes();
|
|
||||||
chunks.push_back(std::move(chunk));
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<Chunk> Squashing::CurrentSize::extract()
|
|
||||||
{
|
|
||||||
auto result = std::move(chunks);
|
|
||||||
*this = {};
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -8,18 +8,9 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
class ChunksToSquash : public ChunkInfoCloneable<ChunksToSquash>
|
struct ChunksToSquash : public ChunkInfo
|
||||||
{
|
{
|
||||||
public:
|
mutable std::vector<Chunk> chunks = {};
|
||||||
ChunksToSquash() = default;
|
|
||||||
ChunksToSquash(const ChunksToSquash & other)
|
|
||||||
{
|
|
||||||
chunks.reserve(other.chunks.size());
|
|
||||||
for (const auto & chunk: other.chunks)
|
|
||||||
chunks.push_back(chunk.clone());
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<Chunk> chunks = {};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Merging consecutive passed blocks to specified minimum size.
|
/** Merging consecutive passed blocks to specified minimum size.
|
||||||
@ -45,35 +36,32 @@ public:
|
|||||||
static Chunk squash(Chunk && input_chunk);
|
static Chunk squash(Chunk && input_chunk);
|
||||||
Chunk flush();
|
Chunk flush();
|
||||||
|
|
||||||
void setHeader(Block header_) { header = std::move(header_); }
|
bool isDataLeft()
|
||||||
const Block & getHeader() const { return header; }
|
{
|
||||||
|
return !chunks_to_merge_vec.empty();
|
||||||
private:
|
}
|
||||||
class CurrentSize
|
|
||||||
|
Block header;
|
||||||
|
private:
|
||||||
|
struct CurrentSize
|
||||||
{
|
{
|
||||||
std::vector<Chunk> chunks = {};
|
|
||||||
size_t rows = 0;
|
size_t rows = 0;
|
||||||
size_t bytes = 0;
|
size_t bytes = 0;
|
||||||
|
|
||||||
public:
|
|
||||||
explicit operator bool () const { return !chunks.empty(); }
|
|
||||||
size_t getRows() const { return rows; }
|
|
||||||
size_t getBytes() const { return bytes; }
|
|
||||||
void add(Chunk && chunk);
|
|
||||||
std::vector<Chunk> extract();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const size_t min_block_size_rows;
|
std::vector<Chunk> chunks_to_merge_vec = {};
|
||||||
const size_t min_block_size_bytes;
|
size_t min_block_size_rows;
|
||||||
Block header;
|
size_t min_block_size_bytes;
|
||||||
|
|
||||||
CurrentSize accumulated;
|
CurrentSize accumulated_size;
|
||||||
|
|
||||||
static Chunk squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoCollection && infos);
|
static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk);
|
||||||
|
|
||||||
bool isEnoughSize() const;
|
static Chunk squash(std::vector<Chunk> & input_chunks);
|
||||||
|
|
||||||
|
void expandCurrentSize(size_t rows, size_t bytes);
|
||||||
|
void changeCurrentSize(size_t rows, size_t bytes);
|
||||||
bool isEnoughSize(size_t rows, size_t bytes) const;
|
bool isEnoughSize(size_t rows, size_t bytes) const;
|
||||||
bool isEnoughSize(const Chunk & chunk) const;
|
|
||||||
|
|
||||||
Chunk convertToChunk(std::vector<Chunk> && chunks) const;
|
Chunk convertToChunk(std::vector<Chunk> && chunks) const;
|
||||||
};
|
};
|
||||||
|
@ -538,13 +538,7 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
|
|||||||
insert_context->makeQueryContext();
|
insert_context->makeQueryContext();
|
||||||
addSettingsForQuery(insert_context, IAST::QueryKind::Insert);
|
addSettingsForQuery(insert_context, IAST::QueryKind::Insert);
|
||||||
|
|
||||||
InterpreterInsertQuery interpreter(
|
InterpreterInsertQuery interpreter(query_ptr, insert_context);
|
||||||
query_ptr,
|
|
||||||
insert_context,
|
|
||||||
/* allow_materialized */ false,
|
|
||||||
/* no_squash */ false,
|
|
||||||
/* no_destination */ false,
|
|
||||||
/* async_isnert */ false);
|
|
||||||
BlockIO io = interpreter.execute();
|
BlockIO io = interpreter.execute();
|
||||||
|
|
||||||
PushingPipelineExecutor executor(io.pipeline);
|
PushingPipelineExecutor executor(io.pipeline);
|
||||||
|
@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check for dynamic subcolumns in unknown required columns.
|
/// Check for dynamic subcolums in unknown required columns.
|
||||||
if (!unknown_required_source_columns.empty())
|
if (!unknown_required_source_columns.empty())
|
||||||
{
|
{
|
||||||
for (const NameAndTypePair & pair : source_columns_ordinary)
|
for (const NameAndTypePair & pair : source_columns_ordinary)
|
||||||
|
@ -1129,11 +1129,11 @@ inline static bool makeHexOrBinStringLiteral(IParser::Pos & pos, ASTPtr & node,
|
|||||||
|
|
||||||
if (hex)
|
if (hex)
|
||||||
{
|
{
|
||||||
hexStringDecode(str_begin, str_end, res_pos);
|
hexStringDecode(str_begin, str_end, res_pos, word_size);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
binStringDecode(str_begin, str_end, res_pos);
|
binStringDecode(str_begin, str_end, res_pos, word_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
return makeStringLiteral(pos, node, String(reinterpret_cast<char *>(res.data()), (res_pos - res_begin - 1)));
|
return makeStringLiteral(pos, node, String(reinterpret_cast<char *>(res.data()), (res_pos - res_begin - 1)));
|
||||||
|
@ -19,6 +19,14 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns
|
|||||||
checkNumRowsIsConsistent();
|
checkNumRowsIsConsistent();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
|
||||||
|
: columns(std::move(columns_))
|
||||||
|
, num_rows(num_rows_)
|
||||||
|
, chunk_info(std::move(chunk_info_))
|
||||||
|
{
|
||||||
|
checkNumRowsIsConsistent();
|
||||||
|
}
|
||||||
|
|
||||||
static Columns unmuteColumns(MutableColumns && mutable_columns)
|
static Columns unmuteColumns(MutableColumns && mutable_columns)
|
||||||
{
|
{
|
||||||
Columns columns;
|
Columns columns;
|
||||||
@ -35,11 +43,17 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_)
|
|||||||
checkNumRowsIsConsistent();
|
checkNumRowsIsConsistent();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_)
|
||||||
|
: columns(unmuteColumns(std::move(columns_)))
|
||||||
|
, num_rows(num_rows_)
|
||||||
|
, chunk_info(std::move(chunk_info_))
|
||||||
|
{
|
||||||
|
checkNumRowsIsConsistent();
|
||||||
|
}
|
||||||
|
|
||||||
Chunk Chunk::clone() const
|
Chunk Chunk::clone() const
|
||||||
{
|
{
|
||||||
auto tmp = Chunk(getColumns(), getNumRows());
|
return Chunk(getColumns(), getNumRows(), chunk_info);
|
||||||
tmp.setChunkInfos(chunk_infos.clone());
|
|
||||||
return tmp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Chunk::setColumns(Columns columns_, UInt64 num_rows_)
|
void Chunk::setColumns(Columns columns_, UInt64 num_rows_)
|
||||||
|
@ -1,9 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <Common/CollectionOfDerived.h>
|
|
||||||
#include <Columns/IColumn.h>
|
#include <Columns/IColumn.h>
|
||||||
|
#include <unordered_map>
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -11,29 +9,11 @@ namespace DB
|
|||||||
class ChunkInfo
|
class ChunkInfo
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
using Ptr = std::shared_ptr<ChunkInfo>;
|
|
||||||
|
|
||||||
ChunkInfo() = default;
|
|
||||||
ChunkInfo(const ChunkInfo&) = default;
|
|
||||||
ChunkInfo(ChunkInfo&&) = default;
|
|
||||||
|
|
||||||
virtual Ptr clone() const = 0;
|
|
||||||
virtual ~ChunkInfo() = default;
|
virtual ~ChunkInfo() = default;
|
||||||
|
ChunkInfo() = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
using ChunkInfoPtr = std::shared_ptr<const ChunkInfo>;
|
||||||
template<class Derived>
|
|
||||||
class ChunkInfoCloneable : public ChunkInfo
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
ChunkInfoCloneable() = default;
|
|
||||||
ChunkInfoCloneable(const ChunkInfoCloneable & other) = default;
|
|
||||||
|
|
||||||
Ptr clone() const override
|
|
||||||
{
|
|
||||||
return std::static_pointer_cast<ChunkInfo>(std::make_shared<Derived>(*static_cast<const Derived*>(this)));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Chunk is a list of columns with the same length.
|
* Chunk is a list of columns with the same length.
|
||||||
@ -52,26 +32,26 @@ public:
|
|||||||
class Chunk
|
class Chunk
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
using ChunkInfoCollection = CollectionOfDerivedItems<ChunkInfo>;
|
|
||||||
|
|
||||||
Chunk() = default;
|
Chunk() = default;
|
||||||
Chunk(const Chunk & other) = delete;
|
Chunk(const Chunk & other) = delete;
|
||||||
Chunk(Chunk && other) noexcept
|
Chunk(Chunk && other) noexcept
|
||||||
: columns(std::move(other.columns))
|
: columns(std::move(other.columns))
|
||||||
, num_rows(other.num_rows)
|
, num_rows(other.num_rows)
|
||||||
, chunk_infos(std::move(other.chunk_infos))
|
, chunk_info(std::move(other.chunk_info))
|
||||||
{
|
{
|
||||||
other.num_rows = 0;
|
other.num_rows = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Chunk(Columns columns_, UInt64 num_rows_);
|
Chunk(Columns columns_, UInt64 num_rows_);
|
||||||
|
Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_);
|
||||||
Chunk(MutableColumns columns_, UInt64 num_rows_);
|
Chunk(MutableColumns columns_, UInt64 num_rows_);
|
||||||
|
Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_);
|
||||||
|
|
||||||
Chunk & operator=(const Chunk & other) = delete;
|
Chunk & operator=(const Chunk & other) = delete;
|
||||||
Chunk & operator=(Chunk && other) noexcept
|
Chunk & operator=(Chunk && other) noexcept
|
||||||
{
|
{
|
||||||
columns = std::move(other.columns);
|
columns = std::move(other.columns);
|
||||||
chunk_infos = std::move(other.chunk_infos);
|
chunk_info = std::move(other.chunk_info);
|
||||||
num_rows = other.num_rows;
|
num_rows = other.num_rows;
|
||||||
other.num_rows = 0;
|
other.num_rows = 0;
|
||||||
return *this;
|
return *this;
|
||||||
@ -82,15 +62,15 @@ public:
|
|||||||
void swap(Chunk & other) noexcept
|
void swap(Chunk & other) noexcept
|
||||||
{
|
{
|
||||||
columns.swap(other.columns);
|
columns.swap(other.columns);
|
||||||
|
chunk_info.swap(other.chunk_info);
|
||||||
std::swap(num_rows, other.num_rows);
|
std::swap(num_rows, other.num_rows);
|
||||||
chunk_infos.swap(other.chunk_infos);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear()
|
void clear()
|
||||||
{
|
{
|
||||||
num_rows = 0;
|
num_rows = 0;
|
||||||
columns.clear();
|
columns.clear();
|
||||||
chunk_infos.clear();
|
chunk_info.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
const Columns & getColumns() const { return columns; }
|
const Columns & getColumns() const { return columns; }
|
||||||
@ -101,9 +81,9 @@ public:
|
|||||||
/** Get empty columns with the same types as in block. */
|
/** Get empty columns with the same types as in block. */
|
||||||
MutableColumns cloneEmptyColumns() const;
|
MutableColumns cloneEmptyColumns() const;
|
||||||
|
|
||||||
ChunkInfoCollection & getChunkInfos() { return chunk_infos; }
|
const ChunkInfoPtr & getChunkInfo() const { return chunk_info; }
|
||||||
const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; }
|
bool hasChunkInfo() const { return chunk_info != nullptr; }
|
||||||
void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); }
|
void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); }
|
||||||
|
|
||||||
UInt64 getNumRows() const { return num_rows; }
|
UInt64 getNumRows() const { return num_rows; }
|
||||||
UInt64 getNumColumns() const { return columns.size(); }
|
UInt64 getNumColumns() const { return columns.size(); }
|
||||||
@ -127,7 +107,7 @@ public:
|
|||||||
private:
|
private:
|
||||||
Columns columns;
|
Columns columns;
|
||||||
UInt64 num_rows = 0;
|
UInt64 num_rows = 0;
|
||||||
ChunkInfoCollection chunk_infos;
|
ChunkInfoPtr chunk_info;
|
||||||
|
|
||||||
void checkNumRowsIsConsistent();
|
void checkNumRowsIsConsistent();
|
||||||
};
|
};
|
||||||
@ -137,15 +117,11 @@ using Chunks = std::vector<Chunk>;
|
|||||||
/// AsyncInsert needs two kinds of information:
|
/// AsyncInsert needs two kinds of information:
|
||||||
/// - offsets of different sub-chunks
|
/// - offsets of different sub-chunks
|
||||||
/// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`.
|
/// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`.
|
||||||
class AsyncInsertInfo : public ChunkInfoCloneable<AsyncInsertInfo>
|
class AsyncInsertInfo : public ChunkInfo
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
AsyncInsertInfo() = default;
|
AsyncInsertInfo() = default;
|
||||||
AsyncInsertInfo(const AsyncInsertInfo & other) = default;
|
explicit AsyncInsertInfo(const std::vector<size_t> & offsets_, const std::vector<String> & tokens_) : offsets(offsets_), tokens(tokens_) {}
|
||||||
AsyncInsertInfo(const std::vector<size_t> & offsets_, const std::vector<String> & tokens_)
|
|
||||||
: offsets(offsets_)
|
|
||||||
, tokens(tokens_)
|
|
||||||
{}
|
|
||||||
|
|
||||||
std::vector<size_t> offsets;
|
std::vector<size_t> offsets;
|
||||||
std::vector<String> tokens;
|
std::vector<String> tokens;
|
||||||
@ -154,11 +130,9 @@ public:
|
|||||||
using AsyncInsertInfoPtr = std::shared_ptr<AsyncInsertInfo>;
|
using AsyncInsertInfoPtr = std::shared_ptr<AsyncInsertInfo>;
|
||||||
|
|
||||||
/// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults.
|
/// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults.
|
||||||
class ChunkMissingValues : public ChunkInfoCloneable<ChunkMissingValues>
|
class ChunkMissingValues : public ChunkInfo
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ChunkMissingValues(const ChunkMissingValues & other) = default;
|
|
||||||
|
|
||||||
using RowsBitMask = std::vector<bool>; /// a bit per row for a column
|
using RowsBitMask = std::vector<bool>; /// a bit per row for a column
|
||||||
|
|
||||||
const RowsBitMask & getDefaultsBitmask(size_t column_idx) const;
|
const RowsBitMask & getDefaultsBitmask(size_t column_idx) const;
|
||||||
|
@ -147,10 +147,13 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds)
|
|||||||
|
|
||||||
block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns());
|
block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns());
|
||||||
|
|
||||||
if (auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>())
|
if (auto chunk_info = chunk.getChunkInfo())
|
||||||
{
|
{
|
||||||
block.info.bucket_num = agg_info->bucket_num;
|
if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(chunk_info.get()))
|
||||||
block.info.is_overflows = agg_info->is_overflows;
|
{
|
||||||
|
block.info.bucket_num = agg_info->bucket_num;
|
||||||
|
block.info.is_overflows = agg_info->is_overflows;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -73,10 +73,13 @@ bool PullingPipelineExecutor::pull(Block & block)
|
|||||||
}
|
}
|
||||||
|
|
||||||
block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns());
|
block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns());
|
||||||
if (auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>())
|
if (auto chunk_info = chunk.getChunkInfo())
|
||||||
{
|
{
|
||||||
block.info.bucket_num = agg_info->bucket_num;
|
if (const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(chunk_info.get()))
|
||||||
block.info.is_overflows = agg_info->is_overflows;
|
{
|
||||||
|
block.info.bucket_num = agg_info->bucket_num;
|
||||||
|
block.info.is_overflows = agg_info->is_overflows;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -179,9 +179,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk)
|
|||||||
columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count);
|
columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count);
|
||||||
|
|
||||||
Chunks piece;
|
Chunks piece;
|
||||||
piece.emplace_back(std::move(columns), count);
|
piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo());
|
||||||
piece.back().setChunkInfos(concatenated.getChunkInfos());
|
|
||||||
|
|
||||||
writeRowGroup(std::move(piece));
|
writeRowGroup(std::move(piece));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8,9 +8,8 @@ namespace ErrorCodes
|
|||||||
}
|
}
|
||||||
|
|
||||||
IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header)
|
IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header)
|
||||||
: IProcessor({std::move(input_header)}, {std::move(output_header)})
|
: IProcessor({std::move(input_header)}, {std::move(output_header)}),
|
||||||
, input(inputs.front())
|
input(inputs.front()), output(outputs.front())
|
||||||
, output(outputs.front())
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,11 +53,13 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num
|
|||||||
if (!input.chunk.hasRows())
|
if (!input.chunk.hasRows())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (input.chunk.getChunkInfos().empty())
|
const auto & info = input.chunk.getChunkInfo();
|
||||||
|
if (!info)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm");
|
||||||
|
|
||||||
Int64 allocated_bytes = 0;
|
Int64 allocated_bytes = 0;
|
||||||
if (auto arenas_info = input.chunk.getChunkInfos().get<ChunkInfoWithAllocatedBytes>())
|
/// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator.
|
||||||
|
if (const auto * arenas_info = typeid_cast<const ChunkInfoWithAllocatedBytes *>(info.get()))
|
||||||
allocated_bytes = arenas_info->allocated_bytes;
|
allocated_bytes = arenas_info->allocated_bytes;
|
||||||
|
|
||||||
states[source_num] = State{input.chunk, description, allocated_bytes};
|
states[source_num] = State{input.chunk, description, allocated_bytes};
|
||||||
@ -134,7 +136,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge()
|
|||||||
info->chunk_num = chunk_num++;
|
info->chunk_num = chunk_num++;
|
||||||
|
|
||||||
Chunk chunk;
|
Chunk chunk;
|
||||||
chunk.getChunkInfos().add(std::move(info));
|
chunk.setChunkInfo(std::move(info));
|
||||||
return chunk;
|
return chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -161,7 +163,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation()
|
|||||||
chunks.emplace_back(std::move(new_columns), current_rows);
|
chunks.emplace_back(std::move(new_columns), current_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
chunks.back().getChunkInfos().add(std::make_shared<AggregatedChunkInfo>());
|
chunks.back().setChunkInfo(std::make_shared<AggregatedChunkInfo>());
|
||||||
states[i].current_row = states[i].to_row;
|
states[i].current_row = states[i].to_row;
|
||||||
|
|
||||||
/// We assume that sizes in bytes of rows are almost the same.
|
/// We assume that sizes in bytes of rows are almost the same.
|
||||||
|
@ -6,22 +6,18 @@ namespace DB
|
|||||||
{
|
{
|
||||||
|
|
||||||
/// To carry part level if chunk is produced by a merge tree source
|
/// To carry part level if chunk is produced by a merge tree source
|
||||||
class MergeTreePartLevelInfo : public ChunkInfoCloneable<MergeTreePartLevelInfo>
|
class MergeTreePartLevelInfo : public ChunkInfo
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
MergeTreePartLevelInfo() = delete;
|
MergeTreePartLevelInfo() = delete;
|
||||||
explicit MergeTreePartLevelInfo(ssize_t part_level)
|
explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { }
|
||||||
: origin_merge_tree_part_level(part_level)
|
|
||||||
{ }
|
|
||||||
MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default;
|
|
||||||
|
|
||||||
size_t origin_merge_tree_part_level = 0;
|
size_t origin_merge_tree_part_level = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline size_t getPartLevelFromChunk(const Chunk & chunk)
|
inline size_t getPartLevelFromChunk(const Chunk & chunk)
|
||||||
{
|
{
|
||||||
const auto part_level_info = chunk.getChunkInfos().get<MergeTreePartLevelInfo>();
|
const auto & info = chunk.getChunkInfo();
|
||||||
if (part_level_info)
|
if (const auto * part_level_info = typeid_cast<const MergeTreePartLevelInfo *>(info.get()))
|
||||||
return part_level_info->origin_merge_tree_part_level;
|
return part_level_info->origin_merge_tree_part_level;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false)
|
static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false)
|
||||||
{
|
{
|
||||||
chunk->getChunkInfos().add(std::make_shared<ChunkSelectFinalIndices>(std::move(chunk->replace_final_selection)));
|
chunk->setChunkInfo(std::make_shared<ChunkSelectFinalIndices>(std::move(chunk->replace_final_selection)));
|
||||||
return IMergingAlgorithm::Status(std::move(*chunk), finished);
|
return IMergingAlgorithm::Status(std::move(*chunk), finished);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
#include <Processors/Merges/Algorithms/MergedData.h>
|
#include <Processors/Merges/Algorithms/MergedData.h>
|
||||||
#include <Processors/Transforms/ColumnGathererTransform.h>
|
#include <Processors/Transforms/ColumnGathererTransform.h>
|
||||||
#include <Processors/Merges/Algorithms/RowRef.h>
|
#include <Processors/Merges/Algorithms/RowRef.h>
|
||||||
#include <Processors/Chunk.h>
|
|
||||||
|
|
||||||
namespace Poco
|
namespace Poco
|
||||||
{
|
{
|
||||||
@ -15,13 +14,11 @@ namespace DB
|
|||||||
|
|
||||||
/** Use in skipping final to keep list of indices of selected row after merging final
|
/** Use in skipping final to keep list of indices of selected row after merging final
|
||||||
*/
|
*/
|
||||||
struct ChunkSelectFinalIndices : public ChunkInfoCloneable<ChunkSelectFinalIndices>
|
struct ChunkSelectFinalIndices : public ChunkInfo
|
||||||
{
|
{
|
||||||
explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_);
|
|
||||||
ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default;
|
|
||||||
|
|
||||||
const ColumnPtr column_holder;
|
const ColumnPtr column_holder;
|
||||||
const ColumnUInt64 * select_final_indices = nullptr;
|
const ColumnUInt64 * select_final_indices = nullptr;
|
||||||
|
explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_);
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Merges several sorted inputs into one.
|
/** Merges several sorted inputs into one.
|
||||||
|
@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare()
|
|||||||
bool is_port_full = !output.canPush();
|
bool is_port_full = !output.canPush();
|
||||||
|
|
||||||
/// Push if has data.
|
/// Push if has data.
|
||||||
if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full)
|
if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full)
|
||||||
output.push(std::move(state.output_chunk));
|
output.push(std::move(state.output_chunk));
|
||||||
|
|
||||||
if (!is_initialized)
|
if (!is_initialized)
|
||||||
|
@ -129,7 +129,7 @@ public:
|
|||||||
|
|
||||||
IMergingAlgorithm::Status status = algorithm.merge();
|
IMergingAlgorithm::Status status = algorithm.merge();
|
||||||
|
|
||||||
if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty())
|
if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo())
|
||||||
{
|
{
|
||||||
// std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl;
|
// std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl;
|
||||||
state.output_chunk = std::move(status.chunk);
|
state.output_chunk = std::move(status.chunk);
|
||||||
|
@ -20,7 +20,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
String getName() const override { return "RemoteSink"; }
|
String getName() const override { return "RemoteSink"; }
|
||||||
void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); }
|
void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); }
|
||||||
void onFinish() override { RemoteInserter::onFinish(); }
|
void onFinish() override { RemoteInserter::onFinish(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -15,8 +15,9 @@ void SinkToStorage::onConsume(Chunk chunk)
|
|||||||
*/
|
*/
|
||||||
Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns()));
|
Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns()));
|
||||||
|
|
||||||
consume(chunk);
|
consume(chunk.clone());
|
||||||
cur_chunk = std::move(chunk);
|
if (!lastBlockIsDuplicate())
|
||||||
|
cur_chunk = std::move(chunk);
|
||||||
}
|
}
|
||||||
|
|
||||||
SinkToStorage::GenerateResult SinkToStorage::onGenerate()
|
SinkToStorage::GenerateResult SinkToStorage::onGenerate()
|
||||||
|
@ -18,7 +18,8 @@ public:
|
|||||||
void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); }
|
void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void consume(Chunk & chunk) = 0;
|
virtual void consume(Chunk chunk) = 0;
|
||||||
|
virtual bool lastBlockIsDuplicate() const { return false; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<TableLockHolder> table_locks;
|
std::vector<TableLockHolder> table_locks;
|
||||||
@ -37,7 +38,7 @@ class NullSinkToStorage : public SinkToStorage
|
|||||||
public:
|
public:
|
||||||
using SinkToStorage::SinkToStorage;
|
using SinkToStorage::SinkToStorage;
|
||||||
std::string getName() const override { return "NullSinkToStorage"; }
|
std::string getName() const override { return "NullSinkToStorage"; }
|
||||||
void consume(Chunk &) override {}
|
void consume(Chunk) override {}
|
||||||
};
|
};
|
||||||
|
|
||||||
using SinkPtr = std::shared_ptr<SinkToStorage>;
|
using SinkPtr = std::shared_ptr<SinkToStorage>;
|
||||||
|
@ -43,10 +43,7 @@ protected:
|
|||||||
info->bucket_num = res.info.bucket_num;
|
info->bucket_num = res.info.bucket_num;
|
||||||
info->is_overflows = res.info.is_overflows;
|
info->is_overflows = res.info.is_overflows;
|
||||||
|
|
||||||
auto chunk = Chunk(res.getColumns(), res.rows());
|
return Chunk(res.getColumns(), res.rows(), std::move(info));
|
||||||
chunk.getChunkInfos().add(std::move(info));
|
|
||||||
|
|
||||||
return chunk;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -176,7 +176,7 @@ std::optional<Chunk> RemoteSource::tryGenerate()
|
|||||||
auto info = std::make_shared<AggregatedChunkInfo>();
|
auto info = std::make_shared<AggregatedChunkInfo>();
|
||||||
info->bucket_num = block.info.bucket_num;
|
info->bucket_num = block.info.bucket_num;
|
||||||
info->is_overflows = block.info.is_overflows;
|
info->is_overflows = block.info.is_overflows;
|
||||||
chunk.getChunkInfos().add(std::move(info));
|
chunk.setChunkInfo(std::move(info));
|
||||||
}
|
}
|
||||||
|
|
||||||
return chunk;
|
return chunk;
|
||||||
|
@ -5,9 +5,7 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_))
|
SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {}
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows())
|
SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows())
|
||||||
{
|
{
|
||||||
@ -22,7 +20,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp
|
|||||||
auto info = std::make_shared<AggregatedChunkInfo>();
|
auto info = std::make_shared<AggregatedChunkInfo>();
|
||||||
info->bucket_num = data.info.bucket_num;
|
info->bucket_num = data.info.bucket_num;
|
||||||
info->is_overflows = data.info.is_overflows;
|
info->is_overflows = data.info.is_overflows;
|
||||||
chunk.getChunkInfos().add(std::move(info));
|
chunk.setChunkInfo(std::move(info));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate()
|
|||||||
variants.aggregates_pool = variants.aggregates_pools.at(0).get();
|
variants.aggregates_pool = variants.aggregates_pools.at(0).get();
|
||||||
|
|
||||||
/// Pass info about used memory by aggregate functions further.
|
/// Pass info about used memory by aggregate functions further.
|
||||||
to_push_chunk.getChunkInfos().add(std::make_shared<ChunkInfoWithAllocatedBytes>(cur_block_bytes));
|
to_push_chunk.setChunkInfo(std::make_shared<ChunkInfoWithAllocatedBytes>(cur_block_bytes));
|
||||||
|
|
||||||
cur_block_bytes = 0;
|
cur_block_bytes = 0;
|
||||||
cur_block_size = 0;
|
cur_block_size = 0;
|
||||||
@ -351,12 +351,11 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati
|
|||||||
void FinalizeAggregatedTransform::transform(Chunk & chunk)
|
void FinalizeAggregatedTransform::transform(Chunk & chunk)
|
||||||
{
|
{
|
||||||
if (params->final)
|
if (params->final)
|
||||||
{
|
|
||||||
finalizeChunk(chunk, aggregates_mask);
|
finalizeChunk(chunk, aggregates_mask);
|
||||||
}
|
else if (!chunk.getChunkInfo())
|
||||||
else if (!chunk.getChunkInfos().get<AggregatedChunkInfo>())
|
|
||||||
{
|
{
|
||||||
chunk.getChunkInfos().add(std::make_shared<AggregatedChunkInfo>());
|
auto info = std::make_shared<AggregatedChunkInfo>();
|
||||||
|
chunk.setChunkInfo(std::move(info));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,7 +5,6 @@
|
|||||||
#include <Processors/ISimpleTransform.h>
|
#include <Processors/ISimpleTransform.h>
|
||||||
#include <Processors/Transforms/AggregatingTransform.h>
|
#include <Processors/Transforms/AggregatingTransform.h>
|
||||||
#include <Processors/Transforms/finalizeChunk.h>
|
#include <Processors/Transforms/finalizeChunk.h>
|
||||||
#include <Processors/Chunk.h>
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -13,12 +12,10 @@ namespace DB
|
|||||||
struct InputOrderInfo;
|
struct InputOrderInfo;
|
||||||
using InputOrderInfoPtr = std::shared_ptr<const InputOrderInfo>;
|
using InputOrderInfoPtr = std::shared_ptr<const InputOrderInfo>;
|
||||||
|
|
||||||
struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable<ChunkInfoWithAllocatedBytes>
|
struct ChunkInfoWithAllocatedBytes : public ChunkInfo
|
||||||
{
|
{
|
||||||
ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default;
|
|
||||||
explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_)
|
explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_)
|
||||||
: allocated_bytes(allocated_bytes_) {}
|
: allocated_bytes(allocated_bytes_) {}
|
||||||
|
|
||||||
Int64 allocated_bytes;
|
Int64 allocated_bytes;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block)
|
|||||||
|
|
||||||
UInt64 num_rows = block.rows();
|
UInt64 num_rows = block.rows();
|
||||||
Chunk chunk(block.getColumns(), num_rows);
|
Chunk chunk(block.getColumns(), num_rows);
|
||||||
chunk.getChunkInfos().add(std::move(info));
|
chunk.setChunkInfo(std::move(info));
|
||||||
|
|
||||||
return chunk;
|
return chunk;
|
||||||
}
|
}
|
||||||
@ -44,11 +44,15 @@ namespace
|
|||||||
{
|
{
|
||||||
const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk)
|
const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk)
|
||||||
{
|
{
|
||||||
auto agg_info = chunk.getChunkInfos().get<AggregatedChunkInfo>();
|
const auto & info = chunk.getChunkInfo();
|
||||||
|
if (!info)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk.");
|
||||||
|
|
||||||
|
const auto * agg_info = typeid_cast<const AggregatedChunkInfo *>(info.get());
|
||||||
if (!agg_info)
|
if (!agg_info)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo.");
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo.");
|
||||||
|
|
||||||
return agg_info.get();
|
return agg_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reads chunks from file in native format. Provide chunks with aggregation info.
|
/// Reads chunks from file in native format. Provide chunks with aggregation info.
|
||||||
@ -206,7 +210,11 @@ private:
|
|||||||
|
|
||||||
void process(Chunk && chunk)
|
void process(Chunk && chunk)
|
||||||
{
|
{
|
||||||
auto chunks_to_merge = chunk.getChunkInfos().get<ChunksToMerge>();
|
if (!chunk.hasChunkInfo())
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName());
|
||||||
|
|
||||||
|
const auto & info = chunk.getChunkInfo();
|
||||||
|
const auto * chunks_to_merge = typeid_cast<const ChunksToMerge *>(info.get());
|
||||||
if (!chunks_to_merge)
|
if (!chunks_to_merge)
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName());
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName());
|
||||||
|
|
||||||
@ -775,7 +783,7 @@ void AggregatingTransform::initGenerate()
|
|||||||
{
|
{
|
||||||
/// Just a reasonable constant, matches default value for the setting `preferred_block_size_bytes`
|
/// Just a reasonable constant, matches default value for the setting `preferred_block_size_bytes`
|
||||||
static constexpr size_t oneMB = 1024 * 1024;
|
static constexpr size_t oneMB = 1024 * 1024;
|
||||||
return std::make_shared<SimpleSquashingTransform>(header, params->params.max_block_size, oneMB);
|
return std::make_shared<SimpleSquashingChunksTransform>(header, params->params.max_block_size, oneMB);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
/// AggregatingTransform::expandPipeline expects single output port.
|
/// AggregatingTransform::expandPipeline expects single output port.
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
#include <Compression/CompressedReadBuffer.h>
|
#include <Compression/CompressedReadBuffer.h>
|
||||||
#include <IO/ReadBufferFromFile.h>
|
#include <IO/ReadBufferFromFile.h>
|
||||||
#include <Interpreters/Aggregator.h>
|
#include <Interpreters/Aggregator.h>
|
||||||
#include <Processors/Chunk.h>
|
|
||||||
#include <Processors/IAccumulatingTransform.h>
|
#include <Processors/IAccumulatingTransform.h>
|
||||||
#include <Common/Stopwatch.h>
|
#include <Common/Stopwatch.h>
|
||||||
#include <Common/setThreadName.h>
|
#include <Common/setThreadName.h>
|
||||||
@ -20,7 +19,7 @@ namespace CurrentMetrics
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
class AggregatedChunkInfo : public ChunkInfoCloneable<AggregatedChunkInfo>
|
class AggregatedChunkInfo : public ChunkInfo
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
bool is_overflows = false;
|
bool is_overflows = false;
|
||||||
|
@ -27,12 +27,18 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
ExceptionKeepingTransform::work();
|
ExceptionKeepingTransform::work();
|
||||||
|
if (finish_chunk)
|
||||||
|
{
|
||||||
|
data.chunk = std::move(finish_chunk);
|
||||||
|
ready_output = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void onConsume(Chunk chunk) override
|
void onConsume(Chunk chunk) override
|
||||||
{
|
{
|
||||||
cur_chunk = Squashing::squash(std::move(chunk));
|
if (auto res_chunk = DB::Squashing::squash(std::move(chunk)))
|
||||||
|
cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows());
|
||||||
}
|
}
|
||||||
|
|
||||||
GenerateResult onGenerate() override
|
GenerateResult onGenerate() override
|
||||||
@ -42,10 +48,16 @@ protected:
|
|||||||
res.is_done = true;
|
res.is_done = true;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
void onFinish() override
|
||||||
|
{
|
||||||
|
auto chunk = DB::Squashing::squash({});
|
||||||
|
finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows());
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Squashing squashing;
|
Squashing squashing;
|
||||||
Chunk cur_chunk;
|
Chunk cur_chunk;
|
||||||
|
Chunk finish_chunk;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#include <Processors/Transforms/CountingTransform.h>
|
|
||||||
|
|
||||||
#include <IO/Progress.h>
|
|
||||||
#include <Interpreters/ProcessList.h>
|
#include <Interpreters/ProcessList.h>
|
||||||
|
#include <Processors/Transforms/CountingTransform.h>
|
||||||
#include <Common/ProfileEvents.h>
|
#include <Common/ProfileEvents.h>
|
||||||
#include <Common/ThreadStatus.h>
|
#include <Common/ThreadStatus.h>
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user