mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge branch 'master' of github.com:ClickHouse/ClickHouse into fix-dynamic-subcolumns-in-analyzer
This commit is contained in:
commit
6b446ad31b
@ -34,14 +34,12 @@ curl https://clickhouse.com/ | sh
|
||||
|
||||
Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
|
||||
|
||||
* [v24.6 Community Call](https://clickhouse.com/company/events/v24-6-community-release-call) - Jul 2
|
||||
* [v24.7 Community Call](https://clickhouse.com/company/events/v24-7-community-release-call) - Jul 30
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
|
||||
|
||||
* [AWS Summit in DC](https://clickhouse.com/company/events/2024-06-aws-summit-dc) - Jun 26
|
||||
* [ClickHouse Meetup in Amsterdam](https://www.meetup.com/clickhouse-netherlands-user-group/events/300781068/) - Jun 27
|
||||
* [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9
|
||||
* [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9
|
||||
* [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9
|
||||
|
@ -84,5 +84,5 @@ if (CMAKE_CROSSCOMPILING)
|
||||
message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
|
||||
endif ()
|
||||
|
||||
message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}")
|
||||
message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILER_TARGET}")
|
||||
endif ()
|
||||
|
@ -125,7 +125,7 @@ configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/include/aws/core/SDKConfig.h" @ONLY)
|
||||
|
||||
aws_get_version(AWS_CRT_CPP_VERSION_MAJOR AWS_CRT_CPP_VERSION_MINOR AWS_CRT_CPP_VERSION_PATCH FULL_VERSION GIT_HASH)
|
||||
configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${AWS_CRT_DIR}/include/aws/crt/Config.h" @ONLY)
|
||||
configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/include/aws/crt/Config.h" @ONLY)
|
||||
|
||||
list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC})
|
||||
|
||||
|
2
contrib/orc
vendored
2
contrib/orc
vendored
@ -1 +1 @@
|
||||
Subproject commit 947cebaf9432d708253ac08dc3012daa6b4ede6f
|
||||
Subproject commit bcc025c09828c556f54cfbdf83a66b9acae7d17f
|
2
contrib/rocksdb
vendored
2
contrib/rocksdb
vendored
@ -1 +1 @@
|
||||
Subproject commit 3a0b80ca9d6eebb38fad7ea3f41dfc9db4f6a984
|
||||
Subproject commit 078fa5638690004e1f744076d1bdcc4e93767304
|
@ -1,7 +1,7 @@
|
||||
option (ENABLE_ROCKSDB "Enable rocksdb library" ${ENABLE_LIBRARIES})
|
||||
option (ENABLE_ROCKSDB "Enable RocksDB" ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT ENABLE_ROCKSDB)
|
||||
message (STATUS "Not using rocksdb")
|
||||
message (STATUS "Not using RocksDB")
|
||||
return()
|
||||
endif()
|
||||
|
||||
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.6.1.4423"
|
||||
ARG VERSION="24.6.2.17"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
47
docker/reqgenerator.py
Normal file
47
docker/reqgenerator.py
Normal file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python3
|
||||
# To run this script you must install docker and piddeptree python package
|
||||
#
|
||||
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def build_docker_deps(image_name, imagedir):
|
||||
cmd = f"""docker run --entrypoint "/bin/bash" {image_name} -c "pip install pipdeptree 2>/dev/null 1>/dev/null && pipdeptree --freeze --warn silence | sed 's/ \+//g' | sort | uniq" > {imagedir}/requirements.txt"""
|
||||
subprocess.check_call(cmd, shell=True)
|
||||
|
||||
|
||||
def check_docker_file_install_with_pip(filepath):
|
||||
image_name = None
|
||||
with open(filepath, "r") as f:
|
||||
for line in f:
|
||||
if "docker build" in line:
|
||||
arr = line.split(" ")
|
||||
if len(arr) > 4:
|
||||
image_name = arr[4]
|
||||
if "pip3 install" in line or "pip install" in line:
|
||||
return image_name, True
|
||||
return image_name, False
|
||||
|
||||
|
||||
def process_affected_images(images_dir):
|
||||
for root, _dirs, files in os.walk(images_dir):
|
||||
for f in files:
|
||||
if f == "Dockerfile":
|
||||
docker_file_path = os.path.join(root, f)
|
||||
print("Checking image on path", docker_file_path)
|
||||
image_name, has_pip = check_docker_file_install_with_pip(
|
||||
docker_file_path
|
||||
)
|
||||
if has_pip:
|
||||
print("Find pip in", image_name)
|
||||
try:
|
||||
build_docker_deps(image_name, root)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
else:
|
||||
print("Pip not found in", docker_file_path)
|
||||
|
||||
|
||||
process_affected_images(sys.argv[1])
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.6.1.4423"
|
||||
ARG VERSION="24.6.2.17"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="24.6.1.4423"
|
||||
ARG VERSION="24.6.2.17"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
#docker-official-library:off
|
||||
|
@ -19,10 +19,7 @@ RUN apt-get update \
|
||||
odbcinst \
|
||||
psmisc \
|
||||
python3 \
|
||||
python3-lxml \
|
||||
python3-pip \
|
||||
python3-requests \
|
||||
python3-termcolor \
|
||||
unixodbc \
|
||||
pv \
|
||||
jq \
|
||||
@ -31,7 +28,8 @@ RUN apt-get update \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
|
||||
# This symlink is required by gcc to find the lld linker
|
||||
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
|
||||
@ -39,6 +37,10 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
|
||||
# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d
|
||||
RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake
|
||||
|
||||
# LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path.
|
||||
# It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792
|
||||
RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu
|
||||
|
||||
ARG CCACHE_VERSION=4.6.1
|
||||
RUN mkdir /tmp/ccache \
|
||||
&& cd /tmp/ccache \
|
||||
|
41
docker/test/fasttest/requirements.txt
Normal file
41
docker/test/fasttest/requirements.txt
Normal file
@ -0,0 +1,41 @@
|
||||
Jinja2==3.1.3
|
||||
MarkupSafe==2.1.5
|
||||
PyJWT==2.3.0
|
||||
PyYAML==6.0.1
|
||||
Pygments==2.11.2
|
||||
SecretStorage==3.3.1
|
||||
blinker==1.4
|
||||
certifi==2020.6.20
|
||||
chardet==4.0.0
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
idna==3.3
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
lxml==4.8.0
|
||||
more-itertools==8.10.0
|
||||
numpy==1.26.3
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pandas==1.5.3
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
python-dateutil==2.9.0.post0
|
||||
pytz==2024.1
|
||||
requests==2.32.3
|
||||
scipy==1.12.0
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
termcolor==1.1.0
|
||||
urllib3==1.26.5
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -84,6 +84,8 @@ function start_server
|
||||
echo "ClickHouse server pid '$server_pid' started and responded"
|
||||
}
|
||||
|
||||
export -f start_server
|
||||
|
||||
function clone_root
|
||||
{
|
||||
[ "$UID" -eq 0 ] && git config --global --add safe.directory "$FASTTEST_SOURCE"
|
||||
@ -254,6 +256,19 @@ function configure
|
||||
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
|
||||
}
|
||||
|
||||
function timeout_with_logging() {
|
||||
local exit_code=0
|
||||
|
||||
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
|
||||
|
||||
if [[ "${exit_code}" -eq "124" ]]
|
||||
then
|
||||
echo "The command 'timeout ${*}' has been killed by timeout"
|
||||
fi
|
||||
|
||||
return $exit_code
|
||||
}
|
||||
|
||||
function run_tests
|
||||
{
|
||||
clickhouse-server --version
|
||||
@ -292,6 +307,8 @@ function run_tests
|
||||
clickhouse stop --pid-path "$FASTTEST_DATA"
|
||||
}
|
||||
|
||||
export -f run_tests
|
||||
|
||||
case "$stage" in
|
||||
"")
|
||||
ls -la
|
||||
@ -315,7 +332,7 @@ case "$stage" in
|
||||
configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt"
|
||||
;&
|
||||
"run_tests")
|
||||
run_tests
|
||||
timeout_with_logging 35m bash -c run_tests ||:
|
||||
/process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \
|
||||
--out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \
|
||||
--out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv"
|
||||
|
@ -31,7 +31,8 @@ RUN apt-get update \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
RUN pip3 install Jinja2
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
|
||||
COPY * /
|
||||
|
||||
|
27
docker/test/fuzzer/requirements.txt
Normal file
27
docker/test/fuzzer/requirements.txt
Normal file
@ -0,0 +1,27 @@
|
||||
blinker==1.4
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
Jinja2==3.1.4
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
MarkupSafe==2.1.5
|
||||
more-itertools==8.10.0
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
PyJWT==2.3.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -33,7 +33,8 @@ RUN apt-get update \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
RUN pip3 install pycurl
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r requirements.txt && rm -rf /root/.cache/pip
|
||||
|
||||
# Architecture of the image when BuildKit/buildx is used
|
||||
ARG TARGETARCH
|
||||
|
26
docker/test/integration/base/requirements.txt
Normal file
26
docker/test/integration/base/requirements.txt
Normal file
@ -0,0 +1,26 @@
|
||||
blinker==1.4
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
more-itertools==8.10.0
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
pycurl==7.45.3
|
||||
PyJWT==2.3.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -2,4 +2,5 @@
|
||||
# Helper docker container to run python bottle apps
|
||||
|
||||
FROM python:3
|
||||
RUN python -m pip install bottle
|
||||
COPY requirements.txt /
|
||||
RUN python -m pip install --no-cache-dir -r requirements.txt
|
||||
|
6
docker/test/integration/resolver/requirements.txt
Normal file
6
docker/test/integration/resolver/requirements.txt
Normal file
@ -0,0 +1,6 @@
|
||||
bottle==0.12.25
|
||||
packaging==24.1
|
||||
pip==23.2.1
|
||||
pipdeptree==2.23.0
|
||||
setuptools==69.0.3
|
||||
wheel==0.42.0
|
@ -26,7 +26,6 @@ RUN apt-get update \
|
||||
libicu-dev \
|
||||
bsdutils \
|
||||
curl \
|
||||
python3-pika \
|
||||
liblua5.1-dev \
|
||||
luajit \
|
||||
libssl-dev \
|
||||
@ -61,49 +60,8 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
|
||||
|
||||
# kazoo 2.10.0 is broken
|
||||
# https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html
|
||||
RUN python3 -m pip install --no-cache-dir \
|
||||
PyMySQL==1.1.0 \
|
||||
asyncio==3.4.3 \
|
||||
avro==1.10.2 \
|
||||
azure-storage-blob==12.19.0 \
|
||||
boto3==1.34.24 \
|
||||
cassandra-driver==3.29.0 \
|
||||
confluent-kafka==2.3.0 \
|
||||
delta-spark==2.3.0 \
|
||||
dict2xml==1.7.4 \
|
||||
dicttoxml==1.7.16 \
|
||||
docker==6.1.3 \
|
||||
docker-compose==1.29.2 \
|
||||
grpcio==1.60.0 \
|
||||
grpcio-tools==1.60.0 \
|
||||
kafka-python==2.0.2 \
|
||||
lz4==4.3.3 \
|
||||
minio==7.2.3 \
|
||||
nats-py==2.6.0 \
|
||||
protobuf==4.25.2 \
|
||||
kazoo==2.9.0 \
|
||||
psycopg2-binary==2.9.6 \
|
||||
pyhdfs==0.3.1 \
|
||||
pymongo==3.11.0 \
|
||||
pyspark==3.3.2 \
|
||||
pytest==7.4.4 \
|
||||
pytest-order==1.0.0 \
|
||||
pytest-random==0.2 \
|
||||
pytest-repeat==0.9.3 \
|
||||
pytest-timeout==2.2.0 \
|
||||
pytest-xdist==3.5.0 \
|
||||
pytest-reportlog==0.4.0 \
|
||||
pytz==2023.3.post1 \
|
||||
pyyaml==5.3.1 \
|
||||
redis==5.0.1 \
|
||||
requests-kerberos==0.14.0 \
|
||||
tzlocal==2.1 \
|
||||
retry==0.9.2 \
|
||||
bs4==0.0.2 \
|
||||
lxml==5.1.0 \
|
||||
urllib3==2.0.7 \
|
||||
jwcrypto==1.5.6
|
||||
# bs4, lxml are for cloud tests, do not delete
|
||||
COPY requirements.txt /
|
||||
RUN python3 -m pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Hudi supports only spark 3.3.*, not 3.4
|
||||
RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \
|
||||
|
113
docker/test/integration/runner/requirements.txt
Normal file
113
docker/test/integration/runner/requirements.txt
Normal file
@ -0,0 +1,113 @@
|
||||
PyHDFS==0.3.1
|
||||
PyJWT==2.3.0
|
||||
PyMySQL==1.1.0
|
||||
PyNaCl==1.5.0
|
||||
PyYAML==5.3.1
|
||||
SecretStorage==3.3.1
|
||||
argon2-cffi-bindings==21.2.0
|
||||
argon2-cffi==23.1.0
|
||||
async-timeout==4.0.3
|
||||
asyncio==3.4.3
|
||||
attrs==23.2.0
|
||||
avro==1.10.2
|
||||
azure-core==1.30.1
|
||||
azure-storage-blob==12.19.0
|
||||
bcrypt==4.1.3
|
||||
beautifulsoup4==4.12.3
|
||||
blinker==1.4
|
||||
boto3==1.34.24
|
||||
botocore==1.34.101
|
||||
bs4==0.0.2
|
||||
cassandra-driver==3.29.0
|
||||
certifi==2024.2.2
|
||||
cffi==1.16.0
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
confluent-kafka==2.3.0
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
decorator==5.1.1
|
||||
delta-spark==2.3.0
|
||||
dict2xml==1.7.4
|
||||
dicttoxml==1.7.16
|
||||
distro-info==1.1+ubuntu0.2
|
||||
distro==1.7.0
|
||||
docker-compose==1.29.2
|
||||
docker==6.1.3
|
||||
dockerpty==0.4.1
|
||||
docopt==0.6.2
|
||||
exceptiongroup==1.2.1
|
||||
execnet==2.1.1
|
||||
geomet==0.2.1.post1
|
||||
grpcio-tools==1.60.0
|
||||
grpcio==1.60.0
|
||||
gssapi==1.8.3
|
||||
httplib2==0.20.2
|
||||
idna==3.7
|
||||
importlib-metadata==4.6.4
|
||||
iniconfig==2.0.0
|
||||
isodate==0.6.1
|
||||
jeepney==0.7.1
|
||||
jmespath==1.0.1
|
||||
jsonschema==3.2.0
|
||||
jwcrypto==1.5.6
|
||||
kafka-python==2.0.2
|
||||
kazoo==2.9.0
|
||||
keyring==23.5.0
|
||||
krb5==0.5.1
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
lxml==5.1.0
|
||||
lz4==4.3.3
|
||||
minio==7.2.3
|
||||
more-itertools==8.10.0
|
||||
nats-py==2.6.0
|
||||
oauthlib==3.2.0
|
||||
packaging==24.0
|
||||
paramiko==3.4.0
|
||||
pika==1.2.0
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
pluggy==1.5.0
|
||||
protobuf==4.25.2
|
||||
psycopg2-binary==2.9.6
|
||||
py4j==0.10.9.5
|
||||
py==1.11.0
|
||||
pycparser==2.22
|
||||
pycryptodome==3.20.0
|
||||
pymongo==3.11.0
|
||||
pyparsing==2.4.7
|
||||
pyrsistent==0.20.0
|
||||
pyspark==3.3.2
|
||||
pyspnego==0.10.2
|
||||
pytest-order==1.0.0
|
||||
pytest-random==0.2
|
||||
pytest-repeat==0.9.3
|
||||
pytest-reportlog==0.4.0
|
||||
pytest-timeout==2.2.0
|
||||
pytest-xdist==3.5.0
|
||||
pytest==7.4.4
|
||||
python-apt==2.4.0+ubuntu3
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==0.21.1
|
||||
pytz==2023.3.post1
|
||||
redis==5.0.1
|
||||
requests-kerberos==0.14.0
|
||||
requests==2.31.0
|
||||
retry==0.9.2
|
||||
s3transfer==0.10.1
|
||||
setuptools==59.6.0
|
||||
simplejson==3.19.2
|
||||
six==1.16.0
|
||||
soupsieve==2.5
|
||||
texttable==1.7.0
|
||||
tomli==2.0.1
|
||||
typing_extensions==4.11.0
|
||||
tzlocal==2.1
|
||||
unattended-upgrades==0.1
|
||||
urllib3==2.0.7
|
||||
wadllib==1.3.6
|
||||
websocket-client==0.59.0
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -1,3 +1,4 @@
|
||||
# docker build -t clickhouse/libfuzzer .
|
||||
ARG FROM_TAG=latest
|
||||
FROM clickhouse/test-base:$FROM_TAG
|
||||
|
||||
@ -29,7 +30,8 @@ RUN apt-get update \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
RUN pip3 install Jinja2
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
|
||||
COPY * /
|
||||
|
||||
|
27
docker/test/libfuzzer/requirements.txt
Normal file
27
docker/test/libfuzzer/requirements.txt
Normal file
@ -0,0 +1,27 @@
|
||||
blinker==1.4
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
Jinja2==3.1.4
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
MarkupSafe==2.1.5
|
||||
more-itertools==8.10.0
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
PyJWT==2.3.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -23,7 +23,6 @@ RUN apt-get update \
|
||||
python3 \
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
python3-setuptools \
|
||||
rsync \
|
||||
tree \
|
||||
tzdata \
|
||||
@ -33,12 +32,14 @@ RUN apt-get update \
|
||||
cargo \
|
||||
ripgrep \
|
||||
zstd \
|
||||
&& pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \
|
||||
&& apt-get purge --yes python3-dev g++ \
|
||||
&& apt-get autoremove --yes \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
COPY requirements.txt /
|
||||
RUN pip3 --no-cache-dir install -r requirements.txt
|
||||
|
||||
COPY run.sh /
|
||||
|
||||
CMD ["bash", "/run.sh"]
|
||||
|
32
docker/test/performance-comparison/requirements.txt
Normal file
32
docker/test/performance-comparison/requirements.txt
Normal file
@ -0,0 +1,32 @@
|
||||
blinker==1.4
|
||||
clickhouse-driver==0.2.7
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
more-itertools==8.10.0
|
||||
numpy==1.26.3
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
Pygments==2.11.2
|
||||
PyJWT==2.3.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
pytz==2023.4
|
||||
PyYAML==6.0.1
|
||||
scipy==1.12.0
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
tzlocal==2.1
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -18,11 +18,8 @@ RUN apt-get update --yes \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
RUN pip3 install \
|
||||
numpy \
|
||||
pyodbc \
|
||||
deepdiff \
|
||||
sqlglot
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
|
||||
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz"
|
||||
|
||||
|
30
docker/test/sqllogic/requirements.txt
Normal file
30
docker/test/sqllogic/requirements.txt
Normal file
@ -0,0 +1,30 @@
|
||||
blinker==1.4
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
deepdiff==7.0.1
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
more-itertools==8.10.0
|
||||
numpy==1.26.4
|
||||
oauthlib==3.2.0
|
||||
ordered-set==4.1.0
|
||||
packaging==24.1
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
PyJWT==2.3.0
|
||||
pyodbc==5.1.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
sqlglot==23.16.0
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -14,9 +14,8 @@ RUN apt-get update --yes \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
RUN pip3 install \
|
||||
pyyaml \
|
||||
clickhouse-driver
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
|
||||
ARG sqltest_repo="https://github.com/elliotchance/sqltest/"
|
||||
|
||||
|
29
docker/test/sqltest/requirements.txt
Normal file
29
docker/test/sqltest/requirements.txt
Normal file
@ -0,0 +1,29 @@
|
||||
blinker==1.4
|
||||
clickhouse-driver==0.2.7
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
httplib2==0.20.2
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
more-itertools==8.10.0
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
PyJWT==2.3.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
pytz==2024.1
|
||||
PyYAML==6.0.1
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
tzlocal==5.2
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -6,7 +6,6 @@ FROM clickhouse/stateless-test:$FROM_TAG
|
||||
RUN apt-get update -y \
|
||||
&& env DEBIAN_FRONTEND=noninteractive \
|
||||
apt-get install --yes --no-install-recommends \
|
||||
python3-requests \
|
||||
nodejs \
|
||||
npm \
|
||||
&& apt-get clean \
|
||||
|
@ -25,10 +25,7 @@ RUN apt-get update -y \
|
||||
openssl \
|
||||
postgresql-client \
|
||||
python3 \
|
||||
python3-lxml \
|
||||
python3-pip \
|
||||
python3-requests \
|
||||
python3-termcolor \
|
||||
qemu-user-static \
|
||||
sqlite3 \
|
||||
sudo \
|
||||
@ -51,7 +48,8 @@ RUN curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PR
|
||||
&& unzip protoc-${PROTOC_VERSION}-linux-x86_64.zip -d /usr/local \
|
||||
&& rm protoc-${PROTOC_VERSION}-linux-x86_64.zip
|
||||
|
||||
RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
|
||||
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
&& cd /tmp/clickhouse-odbc-tmp \
|
||||
|
51
docker/test/stateless/requirements.txt
Normal file
51
docker/test/stateless/requirements.txt
Normal file
@ -0,0 +1,51 @@
|
||||
awscli==1.22.34
|
||||
blinker==1.4
|
||||
botocore==1.23.34
|
||||
certifi==2020.6.20
|
||||
chardet==4.0.0
|
||||
colorama==0.4.4
|
||||
cryptography==3.4.8
|
||||
dbus-python==1.2.18
|
||||
distro==1.7.0
|
||||
docutils==0.17.1
|
||||
gyp==0.1
|
||||
httplib2==0.20.2
|
||||
idna==3.3
|
||||
importlib-metadata==4.6.4
|
||||
jeepney==0.7.1
|
||||
Jinja2==3.1.3
|
||||
jmespath==0.10.0
|
||||
keyring==23.5.0
|
||||
launchpadlib==1.10.16
|
||||
lazr.restfulclient==0.14.4
|
||||
lazr.uri==1.0.6
|
||||
lxml==4.8.0
|
||||
MarkupSafe==2.1.5
|
||||
more-itertools==8.10.0
|
||||
numpy==1.26.3
|
||||
oauthlib==3.2.0
|
||||
packaging==24.1
|
||||
pandas==1.5.3
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
pyarrow==15.0.0
|
||||
pyasn1==0.4.8
|
||||
PyJWT==2.3.0
|
||||
pyparsing==2.4.7
|
||||
python-apt==2.4.0+ubuntu3
|
||||
python-dateutil==2.8.1
|
||||
pytz==2024.1
|
||||
PyYAML==6.0.1
|
||||
requests==2.32.3
|
||||
roman==3.3
|
||||
rsa==4.8
|
||||
s3transfer==0.5.0
|
||||
scipy==1.12.0
|
||||
SecretStorage==3.3.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
termcolor==1.1.0
|
||||
urllib3==1.26.5
|
||||
wadllib==1.3.6
|
||||
wheel==0.37.1
|
||||
zipp==1.0.0
|
@ -6,6 +6,9 @@ source /setup_export_logs.sh
|
||||
# fail on errors, verbose and export all env variables
|
||||
set -e -x -a
|
||||
|
||||
MAX_RUN_TIME=${MAX_RUN_TIME:-10800}
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME))
|
||||
|
||||
# Choose random timezone for this test run.
|
||||
#
|
||||
# NOTE: that clickhouse-test will randomize session_timezone by itself as well
|
||||
@ -262,14 +265,17 @@ function run_tests()
|
||||
|
||||
export -f run_tests
|
||||
|
||||
|
||||
# This should be enough to setup job and collect artifacts
|
||||
TIMEOUT=$((MAX_RUN_TIME - 300))
|
||||
if [ "$NUM_TRIES" -gt "1" ]; then
|
||||
# We don't run tests with Ordinary database in PRs, only in master.
|
||||
# So run new/changed tests with Ordinary at least once in flaky check.
|
||||
timeout_with_logging "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
|
||||
timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
|
||||
| sed 's/All tests have finished//' | sed 's/No tests were run//' ||:
|
||||
fi
|
||||
|
||||
timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||:
|
||||
timeout_with_logging "$TIMEOUT" bash -c run_tests ||:
|
||||
|
||||
echo "Files in current directory"
|
||||
ls -la ./
|
||||
|
@ -38,7 +38,7 @@ function fn_exists() {
|
||||
function timeout_with_logging() {
|
||||
local exit_code=0
|
||||
|
||||
timeout "${@}" || exit_code="${?}"
|
||||
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
|
||||
|
||||
if [[ "${exit_code}" -eq "124" ]]
|
||||
then
|
||||
|
@ -23,22 +23,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
# python-magic is the same version as in Ubuntu 22.04
|
||||
RUN pip3 install \
|
||||
PyGithub \
|
||||
black==23.12.0 \
|
||||
boto3 \
|
||||
codespell==2.2.1 \
|
||||
mypy==1.8.0 \
|
||||
pylint==3.1.0 \
|
||||
python-magic==0.4.24 \
|
||||
flake8==4.0.1 \
|
||||
requests \
|
||||
thefuzz \
|
||||
tqdm==4.66.4 \
|
||||
types-requests \
|
||||
unidiff \
|
||||
jwt \
|
||||
&& rm -rf /root/.cache/pip
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r requirements.txt
|
||||
|
||||
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
|
58
docker/test/style/requirements.txt
Normal file
58
docker/test/style/requirements.txt
Normal file
@ -0,0 +1,58 @@
|
||||
aiohttp==3.9.5
|
||||
aiosignal==1.3.1
|
||||
astroid==3.1.0
|
||||
async-timeout==4.0.3
|
||||
attrs==23.2.0
|
||||
black==23.12.0
|
||||
boto3==1.34.131
|
||||
botocore==1.34.131
|
||||
certifi==2024.6.2
|
||||
cffi==1.16.0
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
codespell==2.2.1
|
||||
cryptography==42.0.8
|
||||
Deprecated==1.2.14
|
||||
dill==0.3.8
|
||||
flake8==4.0.1
|
||||
frozenlist==1.4.1
|
||||
idna==3.7
|
||||
isort==5.13.2
|
||||
jmespath==1.0.1
|
||||
jwt==1.3.1
|
||||
mccabe==0.6.1
|
||||
multidict==6.0.5
|
||||
mypy==1.8.0
|
||||
mypy-extensions==1.0.0
|
||||
packaging==24.1
|
||||
pathspec==0.9.0
|
||||
pip==24.1.1
|
||||
pipdeptree==2.23.0
|
||||
platformdirs==4.2.2
|
||||
pycodestyle==2.8.0
|
||||
pycparser==2.22
|
||||
pyflakes==2.4.0
|
||||
PyGithub==2.3.0
|
||||
PyJWT==2.8.0
|
||||
pylint==3.1.0
|
||||
PyNaCl==1.5.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-magic==0.4.24
|
||||
PyYAML==6.0.1
|
||||
rapidfuzz==3.9.3
|
||||
requests==2.32.3
|
||||
s3transfer==0.10.1
|
||||
setuptools==59.6.0
|
||||
six==1.16.0
|
||||
thefuzz==0.22.1
|
||||
tomli==2.0.1
|
||||
tomlkit==0.12.5
|
||||
tqdm==4.66.4
|
||||
types-requests==2.32.0.20240622
|
||||
typing_extensions==4.12.2
|
||||
unidiff==0.7.5
|
||||
urllib3==2.2.2
|
||||
wheel==0.37.1
|
||||
wrapt==1.16.0
|
||||
yamllint==1.26.3
|
||||
yarl==1.9.4
|
26
docs/changelogs/v24.6.2.17-stable.md
Normal file
26
docs/changelogs/v24.6.2.17-stable.md
Normal file
@ -0,0 +1,26 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.6.2.17-stable (5710a8b5c0c) FIXME as compared to v24.6.1.4423-stable (dcced7c8478)
|
||||
|
||||
#### New Feature
|
||||
* Backported in [#66002](https://github.com/ClickHouse/ClickHouse/issues/66002): Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#65898](https://github.com/ClickHouse/ClickHouse/issues/65898): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Backported in [#65935](https://github.com/ClickHouse/ClickHouse/issues/65935): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#65907](https://github.com/ClickHouse/ClickHouse/issues/65907): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#65962](https://github.com/ClickHouse/ClickHouse/issues/65962): Add missing workload identity changes. [#65848](https://github.com/ClickHouse/ClickHouse/pull/65848) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Backported in [#66033](https://github.com/ClickHouse/ClickHouse/issues/66033): Follow up to [#65046](https://github.com/ClickHouse/ClickHouse/issues/65046). [#65928](https://github.com/ClickHouse/ClickHouse/pull/65928) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#66076](https://github.com/ClickHouse/ClickHouse/issues/66076): Fix support of non-const scale arguments in rounding functions. [#65983](https://github.com/ClickHouse/ClickHouse/pull/65983) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
|
||||
* Backported in [#66017](https://github.com/ClickHouse/ClickHouse/issues/66017): Fix race in s3queue. [#65986](https://github.com/ClickHouse/ClickHouse/pull/65986) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
|
@ -974,6 +974,13 @@ Default value: false
|
||||
|
||||
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
|
||||
|
||||
## use_compact_variant_discriminators_serialization {#use_compact_variant_discriminators_serialization}
|
||||
|
||||
Enables compact mode for binary serialization of discriminators in Variant data type.
|
||||
This mode allows to use significantly less memory for storing discriminators in parts when there is mostly one variant or a lot of NULL values.
|
||||
|
||||
Default value: true
|
||||
|
||||
## merge_workload
|
||||
|
||||
Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead.
|
||||
|
30
docs/en/operations/startup-scripts.md
Normal file
30
docs/en/operations/startup-scripts.md
Normal file
@ -0,0 +1,30 @@
|
||||
---
|
||||
slug: /en/operations/startup-scripts
|
||||
sidebar_label: Startup Scripts
|
||||
---
|
||||
|
||||
# Startup Scripts
|
||||
|
||||
ClickHouse can run arbitrary SQL queries from the server configuration during startup. This can be useful for migrations or automatic schema creation.
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<startup_scripts>
|
||||
<scripts>
|
||||
<query>CREATE ROLE OR REPLACE test_role</query>
|
||||
</scripts>
|
||||
<scripts>
|
||||
<query>CREATE TABLE TestTable (id UInt64) ENGINE=TinyLog</query>
|
||||
<condition>SELECT 1;</condition>
|
||||
</scripts>
|
||||
</startup_scripts>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
ClickHouse executes all queries from the `startup_scripts` sequentially in the specified order. If any of the queries fail, the execution of the following queries won't be interrupted.
|
||||
|
||||
You can specify a conditional query in the config. In that case, the corresponding query executes only when the condition query returns the value `1` or `true`.
|
||||
|
||||
:::note
|
||||
If the condition query returns any other value than `1` or `true`, the result will be interpreted as `false`, and the corresponding won't be executed.
|
||||
:::
|
@ -357,7 +357,7 @@ Number of currently running inserts to Kafka
|
||||
|
||||
Number of alive connections
|
||||
|
||||
### KeeperOutstandingRequets
|
||||
### KeeperOutstandingRequests
|
||||
|
||||
Number of outstanding requests
|
||||
|
||||
|
@ -4,35 +4,56 @@ sidebar_position: 59
|
||||
sidebar_label: clickhouse-disks
|
||||
---
|
||||
|
||||
# clickhouse-disks
|
||||
# Clickhouse-disks
|
||||
|
||||
A utility providing filesystem-like operations for ClickHouse disks.
|
||||
A utility providing filesystem-like operations for ClickHouse disks. It can work in both interactive and not interactive modes.
|
||||
|
||||
Program-wide options:
|
||||
## Program-wide options
|
||||
|
||||
* `--config-file, -C` -- path to ClickHouse config, defaults to `/etc/clickhouse-server/config.xml`.
|
||||
* `--save-logs` -- Log progress of invoked commands to `/var/log/clickhouse-server/clickhouse-disks.log`.
|
||||
* `--log-level` -- What [type](../server-configuration-parameters/settings#server_configuration_parameters-logger) of events to log, defaults to `none`.
|
||||
* `--disk` -- what disk to use for `mkdir, move, read, write, remove` commands. Defaults to `default`.
|
||||
* `--query, -q` -- single query that can be executed without launching interactive mode
|
||||
* `--help, -h` -- print all the options and commands with description
|
||||
|
||||
## Default Disks
|
||||
After the launch two disks are initialized. The first one is a disk `local` that is supposed to imitate local file system from which clickhouse-disks utility was launched. The second one is a disk `default` that is mounted to the local filesystem in the directory that can be found in config as a parameter `clickhouse/path` (default value is `/var/lib/clickhouse`).
|
||||
|
||||
## Clickhouse-disks state
|
||||
For each disk that was added the utility stores current directory (as in a usual filesystem). User can change current directory and switch between disks.
|
||||
|
||||
State is reflected in a prompt "`disk_name`:`path_name`"
|
||||
|
||||
## Commands
|
||||
|
||||
* `copy [--disk-from d1] [--disk-to d2] <FROM_PATH> <TO_PATH>`.
|
||||
Recursively copy data from `FROM_PATH` at disk `d1` (defaults to `disk` value if not provided)
|
||||
to `TO_PATH` at disk `d2` (defaults to `disk` value if not provided).
|
||||
* `move <FROM_PATH> <TO_PATH>`.
|
||||
Move file or directory from `FROM_PATH` to `TO_PATH`.
|
||||
* `remove <PATH>`.
|
||||
Remove `PATH` recursively.
|
||||
* `link <FROM_PATH> <TO_PATH>`.
|
||||
Create a hardlink from `FROM_PATH` to `TO_PATH`.
|
||||
* `list [--recursive] <PATH>...`
|
||||
List files at `PATH`s. Non-recursive by default.
|
||||
* `list-disks`.
|
||||
In these documentation file all mandatory positional arguments are referred as `<parameter>`, named arguments are referred as `[--parameter value]`. All positional parameters could be mentioned as a named parameter with a corresponding name.
|
||||
|
||||
* `cd (change-dir, change_dir) [--disk disk] <path>`
|
||||
Change directory to path `path` on disk `disk` (default value is a current disk). No disk switching happens.
|
||||
* `copy (cp) [--disk-from disk_1] [--disk-to disk_2] <path-from> <path-to>`.
|
||||
Recursively copy data from `path-from` at disk `disk_1` (default value is a current disk (parameter `disk` in a non-interactive mode))
|
||||
to `path-to` at disk `disk_2` (default value is a current disk (parameter `disk` in a non-interactive mode)).
|
||||
* `current_disk_with_path (current, current_disk, current_path)`
|
||||
Print current state in format:
|
||||
`Disk: "current_disk" Path: "current path on current disk"`
|
||||
* `help [<command>]`
|
||||
Print help message about command `command`. If `command` is not specified print information about all commands.
|
||||
* `move (mv) <path-from> <path-to>`.
|
||||
Move file or directory from `path-from` to `path-to` within current disk.
|
||||
* `remove (rm, delete) <path>`.
|
||||
Remove `path` recursively on a current disk.
|
||||
* `link (ln) <path-from> <path-to>`.
|
||||
Create a hardlink from `path-from` to `path-to` on a current disk.
|
||||
* `list (ls) [--recursive] <path>`
|
||||
List files at `path`s on a current disk. Non-recursive by default.
|
||||
* `list-disks (list_disks, ls-disks, ls_disks)`.
|
||||
List disks names.
|
||||
* `mkdir [--recursive] <PATH>`.
|
||||
* `mkdir [--recursive] <path>` on a current disk.
|
||||
Create a directory. Non-recursive by default.
|
||||
* `read: <FROM_PATH> [<TO_PATH>]`
|
||||
Read a file from `FROM_PATH` to `TO_PATH` (`stdout` if not supplied).
|
||||
* `write [FROM_PATH] <TO_PATH>`.
|
||||
Write a file from `FROM_PATH` (`stdin` if not supplied) to `TO_PATH`.
|
||||
* `read (r) <path-from> [--path-to path]`
|
||||
Read a file from `path-from` to `path` (`stdout` if not supplied).
|
||||
* `switch-disk [--path path] <disk>`
|
||||
Switch to disk `disk` on path `path` (if `path` is not specified default value is a previous path on disk `disk`).
|
||||
* `write (w) [--path-from path] <path-to>`.
|
||||
Write a file from `path` (`stdin` if `path` is not supplied, input must finish by Ctrl+D) to `path-to`.
|
||||
|
@ -0,0 +1,37 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/aggthrow
|
||||
sidebar_position: 101
|
||||
---
|
||||
|
||||
# aggThrow
|
||||
|
||||
This function can be used for the purpose of testing exception safety. It will throw an exception on creation with the specified probability.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
aggThrow(throw_prob)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `throw_prob` — Probability to throw on creation. [Float64](../../data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- An exception: `Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT number % 2 AS even, aggThrow(number) FROM numbers(10) GROUP BY even;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
Received exception:
|
||||
Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully: While executing AggregatingTransform. (AGGREGATE_FUNCTION_THROW)
|
||||
```
|
@ -43,6 +43,7 @@ Standard aggregate functions:
|
||||
|
||||
ClickHouse-specific aggregate functions:
|
||||
|
||||
- [aggThrow](../reference/aggthrow.md)
|
||||
- [analysisOfVariance](../reference/analysis_of_variance.md)
|
||||
- [any](../reference/any_respect_nulls.md)
|
||||
- [anyHeavy](../reference/anyheavy.md)
|
||||
|
@ -5,23 +5,45 @@ sidebar_position: 165
|
||||
|
||||
# maxMap
|
||||
|
||||
Syntax: `maxMap(key, value)` or `maxMap(Tuple(key, value))`
|
||||
|
||||
Calculates the maximum from `value` array according to the keys specified in the `key` array.
|
||||
|
||||
Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values.
|
||||
**Syntax**
|
||||
|
||||
The number of elements in `key` and `value` must be the same for each row that is totaled.
|
||||
```sql
|
||||
maxMap(key, value)
|
||||
```
|
||||
or
|
||||
```sql
|
||||
maxMap(Tuple(key, value))
|
||||
```
|
||||
|
||||
Returns a tuple of two arrays: keys and values calculated for the corresponding keys.
|
||||
Alias: `maxMappedArrays`
|
||||
|
||||
Example:
|
||||
:::note
|
||||
- Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values.
|
||||
- The number of elements in `key` and `value` must be the same for each row that is totaled.
|
||||
:::
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `key` — Array of keys. [Array](../../data-types/array.md).
|
||||
- `value` — Array of values. [Array](../../data-types/array.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. [Tuple](../../data-types/tuple.md)([Array](../../data-types/array.md), [Array](../../data-types/array.md)).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT maxMap(a, b)
|
||||
FROM values('a Array(Char), b Array(Int64)', (['x', 'y'], [2, 2]), (['y', 'z'], [3, 1]))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─maxMap(a, b)───────────┐
|
||||
│ [['x','y','z'],[2,3,1]]│
|
||||
|
@ -5,23 +5,45 @@ sidebar_position: 169
|
||||
|
||||
# minMap
|
||||
|
||||
Syntax: `minMap(key, value)` or `minMap(Tuple(key, value))`
|
||||
|
||||
Calculates the minimum from `value` array according to the keys specified in the `key` array.
|
||||
|
||||
Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values.
|
||||
**Syntax**
|
||||
|
||||
The number of elements in `key` and `value` must be the same for each row that is totaled.
|
||||
```sql
|
||||
`minMap(key, value)`
|
||||
```
|
||||
or
|
||||
```sql
|
||||
minMap(Tuple(key, value))
|
||||
```
|
||||
|
||||
Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys.
|
||||
Alias: `minMappedArrays`
|
||||
|
||||
Example:
|
||||
:::note
|
||||
- Passing a tuple of keys and value arrays is identical to passing an array of keys and an array of values.
|
||||
- The number of elements in `key` and `value` must be the same for each row that is totaled.
|
||||
:::
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `key` — Array of keys. [Array](../../data-types/array.md).
|
||||
- `value` — Array of values. [Array](../../data-types/array.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. [Tuple](../../data-types/tuple.md)([Array](../../data-types/array.md), [Array](../../data-types/array.md)).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT minMap(a, b)
|
||||
FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1]))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─minMap(a, b)──────┐
|
||||
│ ([1,2,3],[2,1,1]) │
|
||||
|
@ -83,7 +83,57 @@ Result:
|
||||
```
|
||||
## makeDate32
|
||||
|
||||
Like [makeDate](#makedate) but produces a [Date32](../data-types/date32.md).
|
||||
Creates a date of type [Date32](../../sql-reference/data-types/date32.md) from a year, month, day (or optionally a year and a day).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
makeDate32(year, [month,] day)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `month` — Month (optional). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
|
||||
:::note
|
||||
If `month` is omitted then `day` should take a value between `1` and `365`, otherwise it should take a value between `1` and `31`.
|
||||
:::
|
||||
|
||||
**Returned values**
|
||||
|
||||
- A date created from the arguments. [Date32](../../sql-reference/data-types/date32.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Create a date from a year, month, and day:
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT makeDate32(2024, 1, 1);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
2024-01-01
|
||||
```
|
||||
|
||||
Create a Date from a year and day of year:
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT makeDate32(2024, 100);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
2024-04-09
|
||||
```
|
||||
|
||||
## makeDateTime
|
||||
|
||||
@ -125,12 +175,38 @@ Result:
|
||||
|
||||
## makeDateTime64
|
||||
|
||||
Like [makeDateTime](#makedatetime) but produces a [DateTime64](../data-types/datetime64.md).
|
||||
Creates a [DateTime64](../../sql-reference/data-types/datetime64.md) data type value from its components: year, month, day, hour, minute, second. With optional sub-second precision.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
makeDateTime64(year, month, day, hour, minute, second[, precision])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `year` — Year (0-9999). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `month` — Month (1-12). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `day` — Day (1-31). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `hour` — Hour (0-23). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `minute` — Minute (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `second` — Second (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `precision` — Optional precision of the sub-second component (0-9). [Integer](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A date and time created from the supplied arguments. [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]])
|
||||
SELECT makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5);
|
||||
```
|
||||
|
||||
```response
|
||||
┌─makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5)─┐
|
||||
│ 2023-05-15 10:30:45.00779 │
|
||||
└─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## timestamp
|
||||
|
@ -86,7 +86,7 @@ Returns the fully qualified domain name of the ClickHouse server.
|
||||
fqdn();
|
||||
```
|
||||
|
||||
This function is case-insensitive.
|
||||
Aliases: `fullHostName`, 'FQDN'.
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -1555,7 +1555,7 @@ The result type is UInt64.
|
||||
|
||||
## normalizeQuery
|
||||
|
||||
Replaces literals, sequences of literals and complex aliases with placeholders.
|
||||
Replaces literals, sequences of literals and complex aliases (containing whitespace, more than two digits or at least 36 bytes long such as UUIDs) with placeholder `?`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1573,6 +1573,8 @@ normalizeQuery(x)
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT normalizeQuery('[1, 2, 3, x]') AS query;
|
||||
```
|
||||
@ -1585,9 +1587,44 @@ Result:
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
## normalizeQueryKeepNames
|
||||
|
||||
Replaces literals, sequences of literals with placeholder `?` but does not replace complex aliases (containing whitespace, more than two digits
|
||||
or at least 36 bytes long such as UUIDs). This helps better analyze complex query logs.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
normalizeQueryKeepNames(x)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — Sequence of characters. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Sequence of characters with placeholders. [String](../data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT normalizeQuery('SELECT 1 AS aComplexName123'), normalizeQueryKeepNames('SELECT 1 AS aComplexName123');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─normalizeQuery('SELECT 1 AS aComplexName123')─┬─normalizeQueryKeepNames('SELECT 1 AS aComplexName123')─┐
|
||||
│ SELECT ? AS `?` │ SELECT ? AS aComplexName123 │
|
||||
└───────────────────────────────────────────────┴────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## normalizedQueryHash
|
||||
|
||||
Returns identical 64bit hash values without the values of literals for similar queries. Can be helpful to analyze query log.
|
||||
Returns identical 64bit hash values without the values of literals for similar queries. Can be helpful to analyze query logs.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1605,6 +1642,8 @@ normalizedQueryHash(x)
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`') AS res;
|
||||
```
|
||||
@ -1617,6 +1656,43 @@ Result:
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## normalizedQueryHashKeepNames
|
||||
|
||||
Like [normalizedQueryHash](#normalizedqueryhash) it returns identical 64bit hash values without the values of literals for similar queries but it does not replace complex aliases (containing whitespace, more than two digits
|
||||
or at least 36 bytes long such as UUIDs) with a placeholder before hashing. Can be helpful to analyze query logs.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
normalizedQueryHashKeepNames(x)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — Sequence of characters. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Hash value. [UInt64](../data-types/int-uint.md#uint-ranges).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT normalizedQueryHash('SELECT 1 AS `xyz123`') != normalizedQueryHash('SELECT 1 AS `abc123`') AS normalizedQueryHash;
|
||||
SELECT normalizedQueryHashKeepNames('SELECT 1 AS `xyz123`') != normalizedQueryHashKeepNames('SELECT 1 AS `abc123`') AS normalizedQueryHashKeepNames;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─normalizedQueryHash─┐
|
||||
│ 0 │
|
||||
└─────────────────────┘
|
||||
┌─normalizedQueryHashKeepNames─┐
|
||||
│ 1 │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
## normalizeUTF8NFC
|
||||
|
||||
Converts a string to [NFC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string is valid UTF8-encoded text.
|
||||
|
@ -6,44 +6,122 @@ sidebar_label: Time Window
|
||||
|
||||
# Time Window Functions
|
||||
|
||||
Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below:
|
||||
Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with [WindowView](../statements/create/view.md/#window-view-experimental) are listed below:
|
||||
|
||||
## tumble
|
||||
|
||||
A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
tumble(time_attr, interval [, timezone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type.
|
||||
- `interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type.
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`.
|
||||
- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md)).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT tumble(now(), toIntervalDay('1'))
|
||||
SELECT tumble(now(), toIntervalDay('1'));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─tumble(now(), toIntervalDay('1'))─────────────┐
|
||||
│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │
|
||||
│ ('2024-07-04 00:00:00','2024-07-05 00:00:00') │
|
||||
└───────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## tumbleStart
|
||||
|
||||
Returns the inclusive lower bound of the corresponding [tumbling window](#tumble).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
tumbleStart(time_attr, interval [, timezone]);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
|
||||
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT tumbleStart(now(), toIntervalDay('1'));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─tumbleStart(now(), toIntervalDay('1'))─┐
|
||||
│ 2024-07-04 00:00:00 │
|
||||
└────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## tumbleEnd
|
||||
|
||||
Returns the exclusive upper bound of the corresponding [tumbling window](#tumble).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
tumbleEnd(time_attr, interval [, timezone]);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
|
||||
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT tumbleEnd(now(), toIntervalDay('1'));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─tumbleEnd(now(), toIntervalDay('1'))─┐
|
||||
│ 2024-07-05 00:00:00 │
|
||||
└──────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hop
|
||||
|
||||
A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows.
|
||||
A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows.
|
||||
|
||||
``` sql
|
||||
hop(time_attr, hop_interval, window_interval [, timezone])
|
||||
@ -51,65 +129,118 @@ hop(time_attr, hop_interval, window_interval [, timezone])
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type.
|
||||
- `hop_interval` - Hop interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number.
|
||||
- `window_interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number.
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`.
|
||||
- The inclusive lower and exclusive upper bound of the corresponding hopping window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`.
|
||||
|
||||
:::note
|
||||
Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND)
|
||||
SELECT hop(now(), INTERVAL '1' DAY, INTERVAL '2' DAY);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐
|
||||
│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │
|
||||
└───────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## tumbleStart
|
||||
|
||||
Returns the inclusive lower bound of the corresponding tumbling window.
|
||||
|
||||
``` sql
|
||||
tumbleStart(bounds_tuple);
|
||||
tumbleStart(time_attr, interval [, timezone]);
|
||||
```
|
||||
|
||||
## tumbleEnd
|
||||
|
||||
Returns the exclusive upper bound of the corresponding tumbling window.
|
||||
|
||||
``` sql
|
||||
tumbleEnd(bounds_tuple);
|
||||
tumbleEnd(time_attr, interval [, timezone]);
|
||||
┌─hop(now(), toIntervalDay('1'), toIntervalDay('2'))─┐
|
||||
│ ('2024-07-03 00:00:00','2024-07-05 00:00:00') │
|
||||
└────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hopStart
|
||||
|
||||
Returns the inclusive lower bound of the corresponding hopping window.
|
||||
Returns the inclusive lower bound of the corresponding [hopping window](#hop).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
hopStart(bounds_tuple);
|
||||
hopStart(time_attr, hop_interval, window_interval [, timezone]);
|
||||
```
|
||||
**Arguments**
|
||||
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
|
||||
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The inclusive lower bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
|
||||
|
||||
:::note
|
||||
Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT hopStart(now(), INTERVAL '1' DAY, INTERVAL '2' DAY);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─hopStart(now(), toIntervalDay('1'), toIntervalDay('2'))─┐
|
||||
│ 2024-07-03 00:00:00 │
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## hopEnd
|
||||
|
||||
Returns the exclusive upper bound of the corresponding hopping window.
|
||||
Returns the exclusive upper bound of the corresponding [hopping window](#hop).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
hopEnd(bounds_tuple);
|
||||
hopEnd(time_attr, hop_interval, window_interval [, timezone]);
|
||||
```
|
||||
**Arguments**
|
||||
|
||||
- `time_attr` — Date and time. [DateTime](../data-types/datetime.md).
|
||||
- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md).
|
||||
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional).
|
||||
|
||||
The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The exclusive upper bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md).
|
||||
|
||||
:::note
|
||||
Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT hopEnd(now(), INTERVAL '1' DAY, INTERVAL '2' DAY);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─hopEnd(now(), toIntervalDay('1'), toIntervalDay('2'))─┐
|
||||
│ 2024-07-05 00:00:00 │
|
||||
└───────────────────────────────────────────────────────┘
|
||||
|
||||
```
|
||||
|
||||
## Related content
|
||||
|
@ -600,7 +600,7 @@ mapApply(func, map)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `func` — [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `map` — [Map](../data-types/map.md).
|
||||
|
||||
**Returned value**
|
||||
@ -831,7 +831,39 @@ SELECT mapSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map;
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function.
|
||||
For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function.
|
||||
|
||||
## mapPartialSort
|
||||
|
||||
Sorts the elements of a map in ascending order with additional `limit` argument allowing partial sorting.
|
||||
If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mapPartialSort([func,] limit, map)
|
||||
```
|
||||
**Arguments**
|
||||
|
||||
- `func` – Optional function to apply to the keys and values of the map. [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `limit` – Elements in range [1..limit] are sorted. [(U)Int](../data-types/int-uint.md).
|
||||
- `map` – Map to sort. [Map](../data-types/map.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Partially sorted map. [Map](../data-types/map.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT mapPartialSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2));
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─mapPartialSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐
|
||||
│ {'k2':1,'k3':2,'k1':3} │
|
||||
└───────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## mapReverseSort(\[func,\], map)
|
||||
|
||||
@ -861,3 +893,35 @@ SELECT mapReverseSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map;
|
||||
```
|
||||
|
||||
For more details see function [arrayReverseSort](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort).
|
||||
|
||||
## mapPartialReverseSort
|
||||
|
||||
Sorts the elements of a map in descending order with additional `limit` argument allowing partial sorting.
|
||||
If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
mapPartialReverseSort([func,] limit, map)
|
||||
```
|
||||
**Arguments**
|
||||
|
||||
- `func` – Optional function to apply to the keys and values of the map. [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `limit` – Elements in range [1..limit] are sorted. [(U)Int](../data-types/int-uint.md).
|
||||
- `map` – Map to sort. [Map](../data-types/map.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Partially sorted map. [Map](../data-types/map.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT mapPartialReverseSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2));
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─mapPartialReverseSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐
|
||||
│ {'k1':3,'k3':2,'k2':1} │
|
||||
└──────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
@ -1,6 +1,8 @@
|
||||
set (CLICKHOUSE_DISKS_SOURCES
|
||||
DisksApp.cpp
|
||||
DisksClient.cpp
|
||||
ICommand.cpp
|
||||
CommandChangeDirectory.cpp
|
||||
CommandCopy.cpp
|
||||
CommandLink.cpp
|
||||
CommandList.cpp
|
||||
@ -9,10 +11,14 @@ set (CLICKHOUSE_DISKS_SOURCES
|
||||
CommandMove.cpp
|
||||
CommandRead.cpp
|
||||
CommandRemove.cpp
|
||||
CommandWrite.cpp)
|
||||
CommandSwitchDisk.cpp
|
||||
CommandWrite.cpp
|
||||
CommandHelp.cpp
|
||||
CommandTouch.cpp
|
||||
CommandGetCurrentDiskAndPath.cpp)
|
||||
|
||||
if (CLICKHOUSE_CLOUD)
|
||||
set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp)
|
||||
set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp)
|
||||
endif ()
|
||||
|
||||
set (CLICKHOUSE_DISKS_LINK
|
||||
|
35
programs/disks/CommandChangeDirectory.cpp
Normal file
35
programs/disks/CommandChangeDirectory.cpp
Normal file
@ -0,0 +1,35 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
#include "DisksApp.h"
|
||||
#include "DisksClient.h"
|
||||
#include "ICommand.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CommandChangeDirectory final : public ICommand
|
||||
{
|
||||
public:
|
||||
explicit CommandChangeDirectory() : ICommand()
|
||||
{
|
||||
command_name = "cd";
|
||||
description = "Change directory (makes sense only in interactive mode)";
|
||||
options_description.add_options()("path", po::value<String>(), "the path to which we want to change (mandatory, positional)")(
|
||||
"disk", po::value<String>(), "A disk where the path is changed (without disk switching)");
|
||||
positional_options_description.add("path", 1);
|
||||
}
|
||||
|
||||
void executeImpl(const CommandLineOptions & options, DisksClient & client) override
|
||||
{
|
||||
DiskWithPath & disk = getDiskWithPath(client, options, "disk");
|
||||
String path = getValueFromCommandLineOptionsThrow<String>(options, "path");
|
||||
disk.setPath(path);
|
||||
}
|
||||
};
|
||||
|
||||
CommandPtr makeCommandChangeDirectory()
|
||||
{
|
||||
return std::make_shared<DB::CommandChangeDirectory>();
|
||||
}
|
||||
|
||||
}
|
@ -1,6 +1,8 @@
|
||||
#include "ICommand.h"
|
||||
#include <Interpreters/Context.h>
|
||||
#include "Common/Exception.h"
|
||||
#include <Common/TerminalSize.h>
|
||||
#include "DisksClient.h"
|
||||
#include "ICommand.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -10,59 +12,89 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
class CommandCopy final : public ICommand
|
||||
{
|
||||
public:
|
||||
CommandCopy()
|
||||
explicit CommandCopy() : ICommand()
|
||||
{
|
||||
command_name = "copy";
|
||||
command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth()));
|
||||
description = "Recursively copy data from `FROM_PATH` to `TO_PATH`";
|
||||
usage = "copy [OPTION]... <FROM_PATH> <TO_PATH>";
|
||||
command_option_description->add_options()
|
||||
("disk-from", po::value<String>(), "disk from which we copy")
|
||||
("disk-to", po::value<String>(), "disk to which we copy");
|
||||
description = "Recursively copy data from `path-from` to `path-to`";
|
||||
options_description.add_options()(
|
||||
"disk-from", po::value<String>(), "disk from which we copy is executed (default value is a current disk)")(
|
||||
"disk-to", po::value<String>(), "disk to which copy is executed (default value is a current disk)")(
|
||||
"path-from", po::value<String>(), "path from which copy is executed (mandatory, positional)")(
|
||||
"path-to", po::value<String>(), "path to which copy is executed (mandatory, positional)")(
|
||||
"recursive,r", "recursively copy the directory (required to remove a directory)");
|
||||
positional_options_description.add("path-from", 1);
|
||||
positional_options_description.add("path-to", 1);
|
||||
}
|
||||
|
||||
void processOptions(
|
||||
Poco::Util::LayeredConfiguration & config,
|
||||
po::variables_map & options) const override
|
||||
void executeImpl(const CommandLineOptions & options, DisksClient & client) override
|
||||
{
|
||||
if (options.count("disk-from"))
|
||||
config.setString("disk-from", options["disk-from"].as<String>());
|
||||
if (options.count("disk-to"))
|
||||
config.setString("disk-to", options["disk-to"].as<String>());
|
||||
}
|
||||
auto disk_from = getDiskWithPath(client, options, "disk-from");
|
||||
auto disk_to = getDiskWithPath(client, options, "disk-to");
|
||||
String path_from = disk_from.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-from"));
|
||||
String path_to = disk_to.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-to"));
|
||||
bool recursive = options.count("recursive");
|
||||
|
||||
void execute(
|
||||
const std::vector<String> & command_arguments,
|
||||
std::shared_ptr<DiskSelector> & disk_selector,
|
||||
Poco::Util::LayeredConfiguration & config) override
|
||||
{
|
||||
if (command_arguments.size() != 2)
|
||||
if (!disk_from.getDisk()->exists(path_from))
|
||||
{
|
||||
printHelpMessage();
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments");
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"cannot stat '{}' on disk '{}': No such file or directory",
|
||||
path_from,
|
||||
disk_from.getDisk()->getName());
|
||||
}
|
||||
else if (disk_from.getDisk()->isFile(path_from))
|
||||
{
|
||||
auto target_location = getTargetLocation(path_from, disk_to, path_to);
|
||||
if (!disk_to.getDisk()->exists(target_location) || disk_to.getDisk()->isFile(target_location))
|
||||
{
|
||||
disk_from.getDisk()->copyFile(
|
||||
path_from,
|
||||
*disk_to.getDisk(),
|
||||
target_location,
|
||||
/* read_settings= */ {},
|
||||
/* write_settings= */ {},
|
||||
/* cancellation_hook= */ {});
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "cannot overwrite directory {} with non-directory {}", target_location, path_from);
|
||||
}
|
||||
}
|
||||
else if (disk_from.getDisk()->isDirectory(path_from))
|
||||
{
|
||||
if (!recursive)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "--recursive not specified; omitting directory {}", path_from);
|
||||
}
|
||||
auto target_location = getTargetLocation(path_from, disk_to, path_to);
|
||||
|
||||
String disk_name_from = config.getString("disk-from", config.getString("disk", "default"));
|
||||
String disk_name_to = config.getString("disk-to", config.getString("disk", "default"));
|
||||
|
||||
const String & path_from = command_arguments[0];
|
||||
const String & path_to = command_arguments[1];
|
||||
|
||||
DiskPtr disk_from = disk_selector->get(disk_name_from);
|
||||
DiskPtr disk_to = disk_selector->get(disk_name_to);
|
||||
|
||||
String relative_path_from = validatePathAndGetAsRelative(path_from);
|
||||
String relative_path_to = validatePathAndGetAsRelative(path_to);
|
||||
|
||||
disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to, /* read_settings= */ {}, /* write_settings= */ {}, /* cancellation_hook= */ {});
|
||||
if (disk_to.getDisk()->isFile(target_location))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory {} with directory {}", path_to, target_location);
|
||||
}
|
||||
else if (!disk_to.getDisk()->exists(target_location))
|
||||
{
|
||||
disk_to.getDisk()->createDirectory(target_location);
|
||||
}
|
||||
disk_from.getDisk()->copyDirectoryContent(
|
||||
path_from,
|
||||
disk_to.getDisk(),
|
||||
target_location,
|
||||
/* read_settings= */ {},
|
||||
/* write_settings= */ {},
|
||||
/* cancellation_hook= */ {});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
CommandPtr makeCommandCopy()
|
||||
{
|
||||
return std::make_shared<DB::CommandCopy>();
|
||||
}
|
||||
|
||||
std::unique_ptr <DB::ICommand> makeCommandCopy()
|
||||
{
|
||||
return std::make_unique<DB::CommandCopy>();
|
||||
}
|
||||
|
30
programs/disks/CommandGetCurrentDiskAndPath.cpp
Normal file
30
programs/disks/CommandGetCurrentDiskAndPath.cpp
Normal file
@ -0,0 +1,30 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
#include "DisksApp.h"
|
||||
#include "DisksClient.h"
|
||||
#include "ICommand.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CommandGetCurrentDiskAndPath final : public ICommand
|
||||
{
|
||||
public:
|
||||
explicit CommandGetCurrentDiskAndPath() : ICommand()
|
||||
{
|
||||
command_name = "current_disk_with_path";
|
||||
description = "Prints current disk and path (which coincide with the prompt)";
|
||||
}
|
||||
|
||||
void executeImpl(const CommandLineOptions &, DisksClient & client) override
|
||||
{
|
||||
auto disk = client.getCurrentDiskWithPath();
|
||||
std::cout << "Disk: " << disk.getDisk()->getName() << "\nPath: " << disk.getCurrentPath() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
CommandPtr makeCommandGetCurrentDiskAndPath()
|
||||
{
|
||||
return std::make_shared<DB::CommandGetCurrentDiskAndPath>();
|
||||
}
|
||||
}
|
43
programs/disks/CommandHelp.cpp
Normal file
43
programs/disks/CommandHelp.cpp
Normal file
@ -0,0 +1,43 @@
|
||||
#include "DisksApp.h"
|
||||
#include "ICommand.h"
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CommandHelp final : public ICommand
|
||||
{
|
||||
public:
|
||||
explicit CommandHelp(const DisksApp & disks_app_) : disks_app(disks_app_)
|
||||
{
|
||||
command_name = "help";
|
||||
description = "Print help message about available commands";
|
||||
options_description.add_options()(
|
||||
"command", po::value<String>(), "A command to help with (optional, positional), if not specified, help lists all the commands");
|
||||
positional_options_description.add("command", 1);
|
||||
}
|
||||
|
||||
void executeImpl(const CommandLineOptions & options, DisksClient & /*client*/) override
|
||||
{
|
||||
std::optional<String> command = getValueFromCommandLineOptionsWithOptional<String>(options, "command");
|
||||
if (command.has_value())
|
||||
{
|
||||
disks_app.printCommandHelpMessage(command.value());
|
||||
}
|
||||
else
|
||||
{
|
||||
disks_app.printAvailableCommandsHelpMessage();
|
||||
}
|
||||
}
|
||||
|
||||
const DisksApp & disks_app;
|
||||
};
|
||||
|
||||
CommandPtr makeCommandHelp(const DisksApp & disks_app)
|
||||
{
|
||||
return std::make_shared<DB::CommandHelp>(disks_app);
|
||||
}
|
||||
|
||||
}
|
@ -1,14 +1,9 @@
|
||||
#include "ICommand.h"
|
||||
#include <Interpreters/Context.h>
|
||||
#include "ICommand.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
class CommandLink final : public ICommand
|
||||
{
|
||||
public:
|
||||
@ -16,42 +11,27 @@ public:
|
||||
{
|
||||
command_name = "link";
|
||||
description = "Create hardlink from `from_path` to `to_path`";
|
||||
usage = "link [OPTION]... <FROM_PATH> <TO_PATH>";
|
||||
options_description.add_options()(
|
||||
"path-from", po::value<String>(), "the path from which a hard link will be created (mandatory, positional)")(
|
||||
"path-to", po::value<String>(), "the path where a hard link will be created (mandatory, positional)");
|
||||
positional_options_description.add("path-from", 1);
|
||||
positional_options_description.add("path-to", 1);
|
||||
}
|
||||
|
||||
void processOptions(
|
||||
Poco::Util::LayeredConfiguration &,
|
||||
po::variables_map &) const override
|
||||
void executeImpl(const CommandLineOptions & options, DisksClient & client) override
|
||||
{
|
||||
}
|
||||
auto disk = client.getCurrentDiskWithPath();
|
||||
|
||||
void execute(
|
||||
const std::vector<String> & command_arguments,
|
||||
std::shared_ptr<DiskSelector> & disk_selector,
|
||||
Poco::Util::LayeredConfiguration & config) override
|
||||
{
|
||||
if (command_arguments.size() != 2)
|
||||
{
|
||||
printHelpMessage();
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments");
|
||||
}
|
||||
const String & path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-from"));
|
||||
const String & path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-to"));
|
||||
|
||||
String disk_name = config.getString("disk", "default");
|
||||
|
||||
const String & path_from = command_arguments[0];
|
||||
const String & path_to = command_arguments[1];
|
||||
|
||||
DiskPtr disk = disk_selector->get(disk_name);
|
||||
|
||||
String relative_path_from = validatePathAndGetAsRelative(path_from);
|
||||
String relative_path_to = validatePathAndGetAsRelative(path_to);
|
||||
|
||||
disk->createHardLink(relative_path_from, relative_path_to);
|
||||
disk.getDisk()->createHardLink(path_from, path_to);
|
||||
}
|
||||
};
|
||||
|
||||
CommandPtr makeCommandLink()
|
||||
{
|
||||
return std::make_shared<DB::CommandLink>();
|
||||
}
|
||||
|
||||
std::unique_ptr <DB::ICommand> makeCommandLink()
|
||||
{
|
||||
return std::make_unique<DB::CommandLink>();
|
||||
}
|
||||
|
@ -1,98 +1,95 @@
|
||||
#include "ICommand.h"
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
#include "DisksApp.h"
|
||||
#include "DisksClient.h"
|
||||
#include "ICommand.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
class CommandList final : public ICommand
|
||||
{
|
||||
public:
|
||||
CommandList()
|
||||
explicit CommandList() : ICommand()
|
||||
{
|
||||
command_name = "list";
|
||||
command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth()));
|
||||
description = "List files at path[s]";
|
||||
usage = "list [OPTION]... <PATH>...";
|
||||
command_option_description->add_options()
|
||||
("recursive", "recursively list all directories");
|
||||
options_description.add_options()("recursive", "recursively list the directory")("all", "show hidden files")(
|
||||
"path", po::value<String>(), "the path of listing (mandatory, positional)");
|
||||
positional_options_description.add("path", 1);
|
||||
}
|
||||
|
||||
void processOptions(
|
||||
Poco::Util::LayeredConfiguration & config,
|
||||
po::variables_map & options) const override
|
||||
void executeImpl(const CommandLineOptions & options, DisksClient & client) override
|
||||
{
|
||||
if (options.count("recursive"))
|
||||
config.setBool("recursive", true);
|
||||
}
|
||||
|
||||
void execute(
|
||||
const std::vector<String> & command_arguments,
|
||||
std::shared_ptr<DiskSelector> & disk_selector,
|
||||
Poco::Util::LayeredConfiguration & config) override
|
||||
{
|
||||
if (command_arguments.size() != 1)
|
||||
{
|
||||
printHelpMessage();
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments");
|
||||
}
|
||||
|
||||
String disk_name = config.getString("disk", "default");
|
||||
|
||||
const String & path = command_arguments[0];
|
||||
|
||||
DiskPtr disk = disk_selector->get(disk_name);
|
||||
|
||||
String relative_path = validatePathAndGetAsRelative(path);
|
||||
|
||||
bool recursive = config.getBool("recursive", false);
|
||||
bool recursive = options.count("recursive");
|
||||
bool show_hidden = options.count("all");
|
||||
auto disk = client.getCurrentDiskWithPath();
|
||||
String path = getValueFromCommandLineOptionsWithDefault<String>(options, "path", ".");
|
||||
|
||||
if (recursive)
|
||||
listRecursive(disk, relative_path);
|
||||
listRecursive(disk, path, show_hidden);
|
||||
else
|
||||
list(disk, relative_path);
|
||||
list(disk, path, show_hidden);
|
||||
}
|
||||
|
||||
private:
|
||||
static void list(const DiskPtr & disk, const std::string & relative_path)
|
||||
static void list(const DiskWithPath & disk, const std::string & path, bool show_hidden)
|
||||
{
|
||||
std::vector<String> file_names;
|
||||
disk->listFiles(relative_path, file_names);
|
||||
std::vector<String> file_names = disk.listAllFilesByPath(path);
|
||||
std::vector<String> selected_and_sorted_file_names{};
|
||||
|
||||
for (const auto & file_name : file_names)
|
||||
std::cout << file_name << '\n';
|
||||
if (show_hidden || (!file_name.starts_with('.')))
|
||||
selected_and_sorted_file_names.push_back(file_name);
|
||||
|
||||
std::sort(selected_and_sorted_file_names.begin(), selected_and_sorted_file_names.end());
|
||||
for (const auto & file_name : selected_and_sorted_file_names)
|
||||
{
|
||||
std::cout << file_name << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
static void listRecursive(const DiskPtr & disk, const std::string & relative_path)
|
||||
static void listRecursive(const DiskWithPath & disk, const std::string & relative_path, bool show_hidden)
|
||||
{
|
||||
std::vector<String> file_names;
|
||||
disk->listFiles(relative_path, file_names);
|
||||
std::vector<String> file_names = disk.listAllFilesByPath(relative_path);
|
||||
std::vector<String> selected_and_sorted_file_names{};
|
||||
|
||||
std::cout << relative_path << ":\n";
|
||||
|
||||
if (!file_names.empty())
|
||||
{
|
||||
for (const auto & file_name : file_names)
|
||||
std::cout << file_name << '\n';
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
||||
for (const auto & file_name : file_names)
|
||||
if (show_hidden || (!file_name.starts_with('.')))
|
||||
selected_and_sorted_file_names.push_back(file_name);
|
||||
|
||||
std::sort(selected_and_sorted_file_names.begin(), selected_and_sorted_file_names.end());
|
||||
for (const auto & file_name : selected_and_sorted_file_names)
|
||||
{
|
||||
auto path = relative_path.empty() ? file_name : (relative_path + "/" + file_name);
|
||||
if (disk->isDirectory(path))
|
||||
listRecursive(disk, path);
|
||||
std::cout << file_name << "\n";
|
||||
}
|
||||
std::cout << "\n";
|
||||
|
||||
for (const auto & file_name : selected_and_sorted_file_names)
|
||||
{
|
||||
auto path = [&]() -> String
|
||||
{
|
||||
if (relative_path.ends_with("/"))
|
||||
{
|
||||
return relative_path + file_name;
|
||||
}
|
||||
else
|
||||
{
|
||||
return relative_path + "/" + file_name;
|
||||
}
|
||||
}();
|
||||
if (disk.isDirectory(path))
|
||||
{
|
||||
listRecursive(disk, path, show_hidden);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
std::unique_ptr <DB::ICommand> makeCommandList()
|
||||
CommandPtr makeCommandList()
|
||||
{
|
||||
return std::make_unique<DB::CommandList>();
|
||||
return std::make_shared<DB::CommandList>();
|
||||
}
|
||||
}
|
||||
|
@ -1,68 +1,40 @@
|
||||
#include "ICommand.h"
|
||||
#include <algorithm>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
#include "DisksClient.h"
|
||||
#include "ICommand.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
class CommandListDisks final : public ICommand
|
||||
{
|
||||
public:
|
||||
CommandListDisks()
|
||||
explicit CommandListDisks() : ICommand()
|
||||
{
|
||||
command_name = "list-disks";
|
||||
description = "List disks names";
|
||||
usage = "list-disks [OPTION]";
|
||||
description = "Lists all available disks";
|
||||
}
|
||||
|
||||
void processOptions(
|
||||
Poco::Util::LayeredConfiguration &,
|
||||
po::variables_map &) const override
|
||||
{}
|
||||
|
||||
void execute(
|
||||
const std::vector<String> & command_arguments,
|
||||
std::shared_ptr<DiskSelector> &,
|
||||
Poco::Util::LayeredConfiguration & config) override
|
||||
void executeImpl(const CommandLineOptions &, DisksClient & client) override
|
||||
{
|
||||
if (!command_arguments.empty())
|
||||
std::vector<String> sorted_and_selected{};
|
||||
for (const auto & disk_name : client.getAllDiskNames())
|
||||
{
|
||||
printHelpMessage();
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments");
|
||||
sorted_and_selected.push_back(disk_name + ":" + client.getDiskWithPath(disk_name).getAbsolutePath(""));
|
||||
}
|
||||
|
||||
constexpr auto config_prefix = "storage_configuration.disks";
|
||||
constexpr auto default_disk_name = "default";
|
||||
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
config.keys(config_prefix, keys);
|
||||
|
||||
bool has_default_disk = false;
|
||||
|
||||
/// For the output to be ordered
|
||||
std::set<String> disks;
|
||||
|
||||
for (const auto & disk_name : keys)
|
||||
std::sort(sorted_and_selected.begin(), sorted_and_selected.end());
|
||||
for (const auto & disk_name : sorted_and_selected)
|
||||
{
|
||||
if (disk_name == default_disk_name)
|
||||
has_default_disk = true;
|
||||
disks.insert(disk_name);
|
||||
std::cout << disk_name << "\n";
|
||||
}
|
||||
|
||||
if (!has_default_disk)
|
||||
disks.insert(default_disk_name);
|
||||
|
||||
for (const auto & disk : disks)
|
||||
std::cout << disk << '\n';
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
std::unique_ptr <DB::ICommand> makeCommandListDisks()
|
||||
private:
|
||||
};
|
||||
|
||||
CommandPtr makeCommandListDisks()
|
||||
{
|
||||
return std::make_unique<DB::CommandListDisks>();
|
||||
return std::make_shared<DB::CommandListDisks>();
|
||||
}
|
||||
}
|
||||
|
@ -6,61 +6,35 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
class CommandMkDir final : public ICommand
|
||||
{
|
||||
public:
|
||||
CommandMkDir()
|
||||
{
|
||||
command_name = "mkdir";
|
||||
command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth()));
|
||||
description = "Create a directory";
|
||||
usage = "mkdir [OPTION]... <PATH>";
|
||||
command_option_description->add_options()
|
||||
("recursive", "recursively create directories");
|
||||
description = "Creates a directory";
|
||||
options_description.add_options()("parents", "recursively create directories")(
|
||||
"path", po::value<String>(), "the path on which directory should be created (mandatory, positional)");
|
||||
positional_options_description.add("path", 1);
|
||||
}
|
||||
|
||||
void processOptions(
|
||||
Poco::Util::LayeredConfiguration & config,
|
||||
po::variables_map & options) const override
|
||||
void executeImpl(const CommandLineOptions & options, DisksClient & client) override
|
||||
{
|
||||
if (options.count("recursive"))
|
||||
config.setBool("recursive", true);
|
||||
}
|
||||
bool recursive = options.count("parents");
|
||||
auto disk = client.getCurrentDiskWithPath();
|
||||
|
||||
void execute(
|
||||
const std::vector<String> & command_arguments,
|
||||
std::shared_ptr<DiskSelector> & disk_selector,
|
||||
Poco::Util::LayeredConfiguration & config) override
|
||||
{
|
||||
if (command_arguments.size() != 1)
|
||||
{
|
||||
printHelpMessage();
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments");
|
||||
}
|
||||
|
||||
String disk_name = config.getString("disk", "default");
|
||||
|
||||
const String & path = command_arguments[0];
|
||||
|
||||
DiskPtr disk = disk_selector->get(disk_name);
|
||||
|
||||
String relative_path = validatePathAndGetAsRelative(path);
|
||||
bool recursive = config.getBool("recursive", false);
|
||||
String path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path"));
|
||||
|
||||
if (recursive)
|
||||
disk->createDirectories(relative_path);
|
||||
disk.getDisk()->createDirectories(path);
|
||||
else
|
||||
disk->createDirectory(relative_path);
|
||||
disk.getDisk()->createDirectory(path);
|
||||
}
|
||||
};
|
||||
|
||||
CommandPtr makeCommandMkDir()
|
||||
{
|
||||
return std::make_shared<DB::CommandMkDir>();
|
||||
}
|
||||
|
||||
std::unique_ptr <DB::ICommand> makeCommandMkDir()
|
||||
{
|
||||
return std::make_unique<DB::CommandMkDir>();
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
#include "ICommand.h"
|
||||
#include <Interpreters/Context.h>
|
||||
#include "ICommand.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -9,6 +9,7 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
class CommandMove final : public ICommand
|
||||
{
|
||||
public:
|
||||
@ -16,44 +17,62 @@ public:
|
||||
{
|
||||
command_name = "move";
|
||||
description = "Move file or directory from `from_path` to `to_path`";
|
||||
usage = "move [OPTION]... <FROM_PATH> <TO_PATH>";
|
||||
options_description.add_options()("path-from", po::value<String>(), "path from which we copy (mandatory, positional)")(
|
||||
"path-to", po::value<String>(), "path to which we copy (mandatory, positional)");
|
||||
positional_options_description.add("path-from", 1);
|
||||
positional_options_description.add("path-to", 1);
|
||||
}
|
||||
|
||||
void processOptions(
|
||||
Poco::Util::LayeredConfiguration &,
|
||||
po::variables_map &) const override
|
||||
{}
|
||||
|
||||
void execute(
|
||||
const std::vector<String> & command_arguments,
|
||||
std::shared_ptr<DiskSelector> & disk_selector,
|
||||
Poco::Util::LayeredConfiguration & config) override
|
||||
void executeImpl(const CommandLineOptions & options, DisksClient & client) override
|
||||
{
|
||||
if (command_arguments.size() != 2)
|
||||
auto disk = client.getCurrentDiskWithPath();
|
||||
|
||||
String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-from"));
|
||||
String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-to"));
|
||||
|
||||
if (disk.getDisk()->isFile(path_from))
|
||||
{
|
||||
printHelpMessage();
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments");
|
||||
disk.getDisk()->moveFile(path_from, path_to);
|
||||
}
|
||||
else if (disk.getDisk()->isDirectory(path_from))
|
||||
{
|
||||
auto target_location = getTargetLocation(path_from, disk, path_to);
|
||||
if (!disk.getDisk()->exists(target_location))
|
||||
{
|
||||
disk.getDisk()->createDirectory(target_location);
|
||||
disk.getDisk()->moveDirectory(path_from, target_location);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (disk.getDisk()->isFile(target_location))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory '{}' with directory '{}'", target_location, path_from);
|
||||
}
|
||||
if (!disk.getDisk()->isDirectoryEmpty(target_location))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot move '{}' to '{}': Directory not empty", path_from, target_location);
|
||||
}
|
||||
else
|
||||
{
|
||||
disk.getDisk()->moveDirectory(path_from, target_location);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!disk.getDisk()->exists(path_from))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"cannot stat '{}' on disk: '{}': No such file or directory",
|
||||
path_from,
|
||||
disk.getDisk()->getName());
|
||||
}
|
||||
|
||||
String disk_name = config.getString("disk", "default");
|
||||
|
||||
const String & path_from = command_arguments[0];
|
||||
const String & path_to = command_arguments[1];
|
||||
|
||||
DiskPtr disk = disk_selector->get(disk_name);
|
||||
|
||||
String relative_path_from = validatePathAndGetAsRelative(path_from);
|
||||
String relative_path_to = validatePathAndGetAsRelative(path_to);
|
||||
|
||||
if (disk->isFile(relative_path_from))
|
||||
disk->moveFile(relative_path_from, relative_path_to);
|
||||
else
|
||||
disk->moveDirectory(relative_path_from, relative_path_to);
|
||||
}
|
||||
};
|
||||
|
||||
CommandPtr makeCommandMove()
|
||||
{
|
||||
return std::make_shared<DB::CommandMove>();
|
||||
}
|
||||
|
||||
std::unique_ptr <DB::ICommand> makeCommandMove()
|
||||
{
|
||||
return std::make_unique<DB::CommandMove>();
|
||||
}
|
||||
|
@ -1,78 +1,52 @@
|
||||
#include "ICommand.h"
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/copyData.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
#include "ICommand.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
class CommandRead final : public ICommand
|
||||
{
|
||||
public:
|
||||
CommandRead()
|
||||
{
|
||||
command_name = "read";
|
||||
command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth()));
|
||||
description = "Read a file from `FROM_PATH` to `TO_PATH`";
|
||||
usage = "read [OPTION]... <FROM_PATH> [<TO_PATH>]";
|
||||
command_option_description->add_options()
|
||||
("output", po::value<String>(), "file to which we are reading, defaults to `stdout`");
|
||||
description = "Read a file from `path-from` to `path-to`";
|
||||
options_description.add_options()("path-from", po::value<String>(), "file from which we are reading (mandatory, positional)")(
|
||||
"path-to", po::value<String>(), "file to which we are writing, defaults to `stdout`");
|
||||
positional_options_description.add("path-from", 1);
|
||||
}
|
||||
|
||||
void processOptions(
|
||||
Poco::Util::LayeredConfiguration & config,
|
||||
po::variables_map & options) const override
|
||||
void executeImpl(const CommandLineOptions & options, DisksClient & client) override
|
||||
{
|
||||
if (options.count("output"))
|
||||
config.setString("output", options["output"].as<String>());
|
||||
}
|
||||
auto disk = client.getCurrentDiskWithPath();
|
||||
String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-from"));
|
||||
std::optional<String> path_to = getValueFromCommandLineOptionsWithOptional<String>(options, "path-to");
|
||||
|
||||
void execute(
|
||||
const std::vector<String> & command_arguments,
|
||||
std::shared_ptr<DiskSelector> & disk_selector,
|
||||
Poco::Util::LayeredConfiguration & config) override
|
||||
{
|
||||
if (command_arguments.size() != 1)
|
||||
auto in = disk.getDisk()->readFile(path_from);
|
||||
std::unique_ptr<WriteBufferFromFileBase> out = {};
|
||||
if (path_to.has_value())
|
||||
{
|
||||
printHelpMessage();
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments");
|
||||
}
|
||||
|
||||
String disk_name = config.getString("disk", "default");
|
||||
|
||||
DiskPtr disk = disk_selector->get(disk_name);
|
||||
|
||||
String relative_path = validatePathAndGetAsRelative(command_arguments[0]);
|
||||
|
||||
String path_output = config.getString("output", "");
|
||||
|
||||
if (!path_output.empty())
|
||||
{
|
||||
String relative_path_output = validatePathAndGetAsRelative(path_output);
|
||||
|
||||
auto in = disk->readFile(relative_path);
|
||||
auto out = disk->writeFile(relative_path_output);
|
||||
String relative_path_to = disk.getRelativeFromRoot(path_to.value());
|
||||
out = disk.getDisk()->writeFile(relative_path_to);
|
||||
copyData(*in, *out);
|
||||
out->finalize();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto in = disk->readFile(relative_path);
|
||||
std::unique_ptr<WriteBufferFromFileBase> out = std::make_unique<WriteBufferFromFileDescriptor>(STDOUT_FILENO);
|
||||
out = std::make_unique<WriteBufferFromFileDescriptor>(STDOUT_FILENO);
|
||||
copyData(*in, *out);
|
||||
out->write('\n');
|
||||
}
|
||||
out->finalize();
|
||||
}
|
||||
};
|
||||
|
||||
CommandPtr makeCommandRead()
|
||||
{
|
||||
return std::make_shared<DB::CommandRead>();
|
||||
}
|
||||
|
||||
std::unique_ptr <DB::ICommand> makeCommandRead()
|
||||
{
|
||||
return std::make_unique<DB::CommandRead>();
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "ICommand.h"
|
||||
#include <Interpreters/Context.h>
|
||||
#include "Common/Exception.h"
|
||||
#include "ICommand.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -9,46 +10,49 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
class CommandRemove final : public ICommand
|
||||
{
|
||||
public:
|
||||
CommandRemove()
|
||||
{
|
||||
command_name = "remove";
|
||||
description = "Remove file or directory with all children. Throws exception if file doesn't exists.\nPath should be in format './' or './path' or 'path'";
|
||||
usage = "remove [OPTION]... <PATH>";
|
||||
description = "Remove file or directory. Throws exception if file doesn't exists";
|
||||
options_description.add_options()("path", po::value<String>(), "path that is going to be deleted (mandatory, positional)")(
|
||||
"recursive,r", "recursively removes the directory (required to remove a directory)");
|
||||
positional_options_description.add("path", 1);
|
||||
}
|
||||
|
||||
void processOptions(
|
||||
Poco::Util::LayeredConfiguration &,
|
||||
po::variables_map &) const override
|
||||
{}
|
||||
|
||||
void execute(
|
||||
const std::vector<String> & command_arguments,
|
||||
std::shared_ptr<DiskSelector> & disk_selector,
|
||||
Poco::Util::LayeredConfiguration & config) override
|
||||
void executeImpl(const CommandLineOptions & options, DisksClient & client) override
|
||||
{
|
||||
if (command_arguments.size() != 1)
|
||||
auto disk = client.getCurrentDiskWithPath();
|
||||
const String & path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path"));
|
||||
bool recursive = options.count("recursive");
|
||||
if (!disk.getDisk()->exists(path))
|
||||
{
|
||||
printHelpMessage();
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments");
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} on disk {} doesn't exist", path, disk.getDisk()->getName());
|
||||
}
|
||||
else if (disk.getDisk()->isDirectory(path))
|
||||
{
|
||||
if (!recursive)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot remove '{}': Is a directory", path);
|
||||
}
|
||||
else
|
||||
{
|
||||
disk.getDisk()->removeRecursive(path);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
disk.getDisk()->removeFileIfExists(path);
|
||||
}
|
||||
|
||||
String disk_name = config.getString("disk", "default");
|
||||
|
||||
const String & path = command_arguments[0];
|
||||
|
||||
DiskPtr disk = disk_selector->get(disk_name);
|
||||
|
||||
String relative_path = validatePathAndGetAsRelative(path);
|
||||
|
||||
disk->removeRecursive(relative_path);
|
||||
}
|
||||
};
|
||||
|
||||
CommandPtr makeCommandRemove()
|
||||
{
|
||||
return std::make_shared<DB::CommandRemove>();
|
||||
}
|
||||
|
||||
std::unique_ptr <DB::ICommand> makeCommandRemove()
|
||||
{
|
||||
return std::make_unique<DB::CommandRemove>();
|
||||
}
|
||||
|
35
programs/disks/CommandSwitchDisk.cpp
Normal file
35
programs/disks/CommandSwitchDisk.cpp
Normal file
@ -0,0 +1,35 @@
|
||||
#include <optional>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
#include "DisksApp.h"
|
||||
#include "ICommand.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CommandSwitchDisk final : public ICommand
|
||||
{
|
||||
public:
|
||||
explicit CommandSwitchDisk() : ICommand()
|
||||
{
|
||||
command_name = "switch-disk";
|
||||
description = "Switch disk (makes sense only in interactive mode)";
|
||||
options_description.add_options()("disk", po::value<String>(), "the disk to switch to (mandatory, positional)")(
|
||||
"path", po::value<String>(), "the path to switch on the disk");
|
||||
positional_options_description.add("disk", 1);
|
||||
}
|
||||
|
||||
void executeImpl(const CommandLineOptions & options, DisksClient & client) override
|
||||
{
|
||||
String disk = getValueFromCommandLineOptions<String>(options, "disk");
|
||||
std::optional<String> path = getValueFromCommandLineOptionsWithOptional<String>(options, "path");
|
||||
|
||||
client.switchToDisk(disk, path);
|
||||
}
|
||||
};
|
||||
|
||||
CommandPtr makeCommandSwitchDisk()
|
||||
{
|
||||
return std::make_shared<DB::CommandSwitchDisk>();
|
||||
}
|
||||
}
|
34
programs/disks/CommandTouch.cpp
Normal file
34
programs/disks/CommandTouch.cpp
Normal file
@ -0,0 +1,34 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
#include "DisksApp.h"
|
||||
#include "DisksClient.h"
|
||||
#include "ICommand.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CommandTouch final : public ICommand
|
||||
{
|
||||
public:
|
||||
explicit CommandTouch() : ICommand()
|
||||
{
|
||||
command_name = "touch";
|
||||
description = "Create a file by path";
|
||||
options_description.add_options()("path", po::value<String>(), "the path of listing (mandatory, positional)");
|
||||
positional_options_description.add("path", 1);
|
||||
}
|
||||
|
||||
void executeImpl(const CommandLineOptions & options, DisksClient & client) override
|
||||
{
|
||||
auto disk = client.getCurrentDiskWithPath();
|
||||
String path = getValueFromCommandLineOptionsThrow<String>(options, "path");
|
||||
|
||||
disk.getDisk()->createFile(disk.getRelativeFromRoot(path));
|
||||
}
|
||||
};
|
||||
|
||||
CommandPtr makeCommandTouch()
|
||||
{
|
||||
return std::make_shared<DB::CommandTouch>();
|
||||
}
|
||||
}
|
@ -1,79 +1,57 @@
|
||||
#include "ICommand.h"
|
||||
#include <Interpreters/Context.h>
|
||||
#include "ICommand.h"
|
||||
|
||||
#include <Common/TerminalSize.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/copyData.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
class CommandWrite final : public ICommand
|
||||
{
|
||||
public:
|
||||
CommandWrite()
|
||||
{
|
||||
command_name = "write";
|
||||
command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth()));
|
||||
description = "Write a file from `FROM_PATH` to `TO_PATH`";
|
||||
usage = "write [OPTION]... [<FROM_PATH>] <TO_PATH>";
|
||||
command_option_description->add_options()
|
||||
("input", po::value<String>(), "file from which we are reading, defaults to `stdin`");
|
||||
description = "Write a file from `path-from` to `path-to`";
|
||||
options_description.add_options()("path-from", po::value<String>(), "file from which we are reading, defaults to `stdin` (input from `stdin` is finished by Ctrl+D)")(
|
||||
"path-to", po::value<String>(), "file to which we are writing (mandatory, positional)");
|
||||
positional_options_description.add("path-to", 1);
|
||||
}
|
||||
|
||||
void processOptions(
|
||||
Poco::Util::LayeredConfiguration & config,
|
||||
po::variables_map & options) const override
|
||||
|
||||
void executeImpl(const CommandLineOptions & options, DisksClient & client) override
|
||||
{
|
||||
if (options.count("input"))
|
||||
config.setString("input", options["input"].as<String>());
|
||||
}
|
||||
auto disk = client.getCurrentDiskWithPath();
|
||||
|
||||
void execute(
|
||||
const std::vector<String> & command_arguments,
|
||||
std::shared_ptr<DiskSelector> & disk_selector,
|
||||
Poco::Util::LayeredConfiguration & config) override
|
||||
{
|
||||
if (command_arguments.size() != 1)
|
||||
std::optional<String> path_from = getValueFromCommandLineOptionsWithOptional<String>(options, "path-from");
|
||||
|
||||
String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-to"));
|
||||
|
||||
auto in = [&]() -> std::unique_ptr<ReadBufferFromFileBase>
|
||||
{
|
||||
printHelpMessage();
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments");
|
||||
}
|
||||
if (!path_from.has_value())
|
||||
{
|
||||
return std::make_unique<ReadBufferFromFileDescriptor>(STDIN_FILENO);
|
||||
}
|
||||
else
|
||||
{
|
||||
String relative_path_from = disk.getRelativeFromRoot(path_from.value());
|
||||
return disk.getDisk()->readFile(relative_path_from);
|
||||
}
|
||||
}();
|
||||
|
||||
String disk_name = config.getString("disk", "default");
|
||||
|
||||
const String & path = command_arguments[0];
|
||||
|
||||
DiskPtr disk = disk_selector->get(disk_name);
|
||||
|
||||
String relative_path = validatePathAndGetAsRelative(path);
|
||||
|
||||
String path_input = config.getString("input", "");
|
||||
std::unique_ptr<ReadBufferFromFileBase> in;
|
||||
if (path_input.empty())
|
||||
{
|
||||
in = std::make_unique<ReadBufferFromFileDescriptor>(STDIN_FILENO);
|
||||
}
|
||||
else
|
||||
{
|
||||
String relative_path_input = validatePathAndGetAsRelative(path_input);
|
||||
in = disk->readFile(relative_path_input);
|
||||
}
|
||||
|
||||
auto out = disk->writeFile(relative_path);
|
||||
auto out = disk.getDisk()->writeFile(path_to);
|
||||
copyData(*in, *out);
|
||||
out->finalize();
|
||||
}
|
||||
};
|
||||
|
||||
CommandPtr makeCommandWrite()
|
||||
{
|
||||
return std::make_shared<DB::CommandWrite>();
|
||||
}
|
||||
|
||||
std::unique_ptr <DB::ICommand> makeCommandWrite()
|
||||
{
|
||||
return std::make_unique<DB::CommandWrite>();
|
||||
}
|
||||
|
@ -1,11 +1,22 @@
|
||||
#include "DisksApp.h"
|
||||
#include <Client/ClientBase.h>
|
||||
#include <Client/ReplxxLineReader.h>
|
||||
#include "Common/Exception.h"
|
||||
#include "Common/filesystemHelpers.h"
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
#include "DisksClient.h"
|
||||
#include "ICommand.h"
|
||||
#include "ICommand_fwd.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
#include <Disks/registerDisks.h>
|
||||
|
||||
#include <Common/TerminalSize.h>
|
||||
#include <Formats/registerFormats.h>
|
||||
|
||||
#include <Common/TerminalSize.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -13,74 +24,289 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
};
|
||||
|
||||
LineReader::Patterns DisksApp::query_extenders = {"\\"};
|
||||
LineReader::Patterns DisksApp::query_delimiters = {""};
|
||||
String DisksApp::word_break_characters = " \t\v\f\a\b\r\n";
|
||||
|
||||
CommandPtr DisksApp::getCommandByName(const String & command) const
|
||||
{
|
||||
try
|
||||
{
|
||||
if (auto it = aliases.find(command); it != aliases.end())
|
||||
return command_descriptions.at(it->second);
|
||||
|
||||
return command_descriptions.at(command);
|
||||
}
|
||||
catch (std::out_of_range &)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The command `{}` is unknown", command);
|
||||
}
|
||||
}
|
||||
|
||||
size_t DisksApp::findCommandPos(std::vector<String> & common_arguments)
|
||||
std::vector<String> DisksApp::getEmptyCompletion(String command_name) const
|
||||
{
|
||||
for (size_t i = 0; i < common_arguments.size(); i++)
|
||||
if (supported_commands.contains(common_arguments[i]))
|
||||
return i + 1;
|
||||
return common_arguments.size();
|
||||
auto command_ptr = command_descriptions.at(command_name);
|
||||
std::vector<String> answer{};
|
||||
if (multidisk_commands.contains(command_ptr->command_name))
|
||||
{
|
||||
answer = client->getAllFilesByPatternFromAllDisks("");
|
||||
}
|
||||
else
|
||||
{
|
||||
answer = client->getCurrentDiskWithPath().getAllFilesByPattern("");
|
||||
}
|
||||
for (const auto & disk_name : client->getAllDiskNames())
|
||||
{
|
||||
answer.push_back(disk_name);
|
||||
}
|
||||
for (const auto & option : command_ptr->options_description.options())
|
||||
{
|
||||
answer.push_back("--" + option->long_name());
|
||||
}
|
||||
if (command_name == "help")
|
||||
{
|
||||
for (const auto & [current_command_name, description] : command_descriptions)
|
||||
{
|
||||
answer.push_back(current_command_name);
|
||||
}
|
||||
}
|
||||
std::sort(answer.begin(), answer.end());
|
||||
return answer;
|
||||
}
|
||||
|
||||
void DisksApp::printHelpMessage(ProgramOptionsDescription & command_option_description)
|
||||
std::vector<String> DisksApp::getCommandsToComplete(const String & command_prefix) const
|
||||
{
|
||||
std::optional<ProgramOptionsDescription> help_description =
|
||||
createOptionsDescription("Help Message for clickhouse-disks", getTerminalWidth());
|
||||
|
||||
help_description->add(command_option_description);
|
||||
|
||||
std::cout << "ClickHouse disk management tool\n";
|
||||
std::cout << "Usage: ./clickhouse-disks [OPTION]\n";
|
||||
std::cout << "clickhouse-disks\n\n";
|
||||
|
||||
for (const auto & current_command : supported_commands)
|
||||
std::cout << command_descriptions[current_command]->command_name
|
||||
<< "\t"
|
||||
<< command_descriptions[current_command]->description
|
||||
<< "\n\n";
|
||||
|
||||
std::cout << command_option_description << '\n';
|
||||
std::vector<String> answer{};
|
||||
for (const auto & [word, _] : command_descriptions)
|
||||
{
|
||||
if (word.starts_with(command_prefix))
|
||||
{
|
||||
answer.push_back(word);
|
||||
}
|
||||
}
|
||||
if (!answer.empty())
|
||||
{
|
||||
std::sort(answer.begin(), answer.end());
|
||||
return answer;
|
||||
}
|
||||
for (const auto & [word, _] : aliases)
|
||||
{
|
||||
if (word.starts_with(command_prefix))
|
||||
{
|
||||
answer.push_back(word);
|
||||
}
|
||||
}
|
||||
if (!answer.empty())
|
||||
{
|
||||
std::sort(answer.begin(), answer.end());
|
||||
return answer;
|
||||
}
|
||||
return {command_prefix};
|
||||
}
|
||||
|
||||
String DisksApp::getDefaultConfigFileName()
|
||||
std::vector<String> DisksApp::getCompletions(const String & prefix) const
|
||||
{
|
||||
return "/etc/clickhouse-server/config.xml";
|
||||
auto arguments = po::split_unix(prefix, word_break_characters);
|
||||
if (arguments.empty())
|
||||
{
|
||||
return {};
|
||||
}
|
||||
if (word_break_characters.contains(prefix.back()))
|
||||
{
|
||||
CommandPtr command;
|
||||
try
|
||||
{
|
||||
command = getCommandByName(arguments[0]);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
return {arguments.back()};
|
||||
}
|
||||
return getEmptyCompletion(command->command_name);
|
||||
}
|
||||
else if (arguments.size() == 1)
|
||||
{
|
||||
String command_prefix = arguments[0];
|
||||
return getCommandsToComplete(command_prefix);
|
||||
}
|
||||
else
|
||||
{
|
||||
String last_token = arguments.back();
|
||||
CommandPtr command;
|
||||
try
|
||||
{
|
||||
command = getCommandByName(arguments[0]);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
return {last_token};
|
||||
}
|
||||
std::vector<String> answer = {};
|
||||
if (command->command_name == "help")
|
||||
{
|
||||
return getCommandsToComplete(last_token);
|
||||
}
|
||||
else
|
||||
{
|
||||
answer = [&]() -> std::vector<String>
|
||||
{
|
||||
if (multidisk_commands.contains(command->command_name))
|
||||
{
|
||||
return client->getAllFilesByPatternFromAllDisks(last_token);
|
||||
}
|
||||
else
|
||||
{
|
||||
return client->getCurrentDiskWithPath().getAllFilesByPattern(last_token);
|
||||
}
|
||||
}();
|
||||
|
||||
for (const auto & disk_name : client->getAllDiskNames())
|
||||
{
|
||||
if (disk_name.starts_with(last_token))
|
||||
{
|
||||
answer.push_back(disk_name);
|
||||
}
|
||||
}
|
||||
for (const auto & option : command->options_description.options())
|
||||
{
|
||||
String option_sign = "--" + option->long_name();
|
||||
if (option_sign.starts_with(last_token))
|
||||
{
|
||||
answer.push_back(option_sign);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!answer.empty())
|
||||
{
|
||||
std::sort(answer.begin(), answer.end());
|
||||
return answer;
|
||||
}
|
||||
else
|
||||
{
|
||||
return {last_token};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DisksApp::addOptions(
|
||||
ProgramOptionsDescription & options_description_,
|
||||
boost::program_options::positional_options_description & positional_options_description
|
||||
)
|
||||
bool DisksApp::processQueryText(const String & text)
|
||||
{
|
||||
options_description_.add_options()
|
||||
("help,h", "Print common help message")
|
||||
("config-file,C", po::value<String>(), "Set config file")
|
||||
("disk", po::value<String>(), "Set disk name")
|
||||
("command_name", po::value<String>(), "Name for command to do")
|
||||
("save-logs", "Save logs to a file")
|
||||
("log-level", po::value<String>(), "Logging level")
|
||||
;
|
||||
if (text.find_first_not_of(word_break_characters) == std::string::npos)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if (exit_strings.find(text) != exit_strings.end())
|
||||
return false;
|
||||
CommandPtr command;
|
||||
try
|
||||
{
|
||||
auto arguments = po::split_unix(text, word_break_characters);
|
||||
command = getCommandByName(arguments[0]);
|
||||
arguments.erase(arguments.begin());
|
||||
command->execute(arguments, *client);
|
||||
}
|
||||
catch (DB::Exception & err)
|
||||
{
|
||||
int code = getCurrentExceptionCode();
|
||||
if (code == ErrorCodes::LOGICAL_ERROR)
|
||||
{
|
||||
throw std::move(err);
|
||||
}
|
||||
else if (code == ErrorCodes::BAD_ARGUMENTS)
|
||||
{
|
||||
std::cerr << err.message() << "\n"
|
||||
<< "\n";
|
||||
if (command.get())
|
||||
{
|
||||
std::cerr << "COMMAND: " << command->command_name << "\n";
|
||||
std::cerr << command->options_description << "\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
printAvailableCommandsHelpMessage();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cerr << err.message() << "\n";
|
||||
}
|
||||
}
|
||||
catch (std::exception & err)
|
||||
{
|
||||
std::cerr << err.what() << "\n";
|
||||
}
|
||||
|
||||
positional_options_description.add("command_name", 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
supported_commands = {"list-disks", "list", "move", "remove", "link", "copy", "write", "read", "mkdir"};
|
||||
#ifdef CLICKHOUSE_CLOUD
|
||||
supported_commands.insert("packed-io");
|
||||
#endif
|
||||
void DisksApp::runInteractiveReplxx()
|
||||
{
|
||||
ReplxxLineReader lr(
|
||||
suggest,
|
||||
history_file,
|
||||
/* multiline= */ false,
|
||||
query_extenders,
|
||||
query_delimiters,
|
||||
word_break_characters.c_str(),
|
||||
/* highlighter_= */ {});
|
||||
lr.enableBracketedPaste();
|
||||
|
||||
while (true)
|
||||
{
|
||||
DiskWithPath disk_with_path = client->getCurrentDiskWithPath();
|
||||
String prompt = "\x1b[1;34m" + disk_with_path.getDisk()->getName() + "\x1b[0m:" + "\x1b[1;31m" + disk_with_path.getCurrentPath()
|
||||
+ "\x1b[0m$ ";
|
||||
|
||||
auto input = lr.readLine(prompt, "\x1b[1;31m:-] \x1b[0m");
|
||||
if (input.empty())
|
||||
break;
|
||||
|
||||
if (!processQueryText(input))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void DisksApp::parseAndCheckOptions(
|
||||
const std::vector<String> & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options)
|
||||
{
|
||||
auto parser = po::command_line_parser(arguments).options(options_description).allow_unregistered();
|
||||
po::parsed_options parsed = parser.run();
|
||||
po::store(parsed, options);
|
||||
}
|
||||
|
||||
void DisksApp::addOptions()
|
||||
{
|
||||
options_description.add_options()("help,h", "Print common help message")("config-file,C", po::value<String>(), "Set config file")(
|
||||
"disk", po::value<String>(), "Set disk name")("save-logs", "Save logs to a file")(
|
||||
"log-level", po::value<String>(), "Logging level")("query,q", po::value<String>(), "Query for a non-interactive mode")(
|
||||
"test-mode", "Interactive interface in test regyme");
|
||||
|
||||
command_descriptions.emplace("list-disks", makeCommandListDisks());
|
||||
command_descriptions.emplace("copy", makeCommandCopy());
|
||||
command_descriptions.emplace("list", makeCommandList());
|
||||
command_descriptions.emplace("cd", makeCommandChangeDirectory());
|
||||
command_descriptions.emplace("move", makeCommandMove());
|
||||
command_descriptions.emplace("remove", makeCommandRemove());
|
||||
command_descriptions.emplace("link", makeCommandLink());
|
||||
command_descriptions.emplace("copy", makeCommandCopy());
|
||||
command_descriptions.emplace("write", makeCommandWrite());
|
||||
command_descriptions.emplace("read", makeCommandRead());
|
||||
command_descriptions.emplace("mkdir", makeCommandMkDir());
|
||||
command_descriptions.emplace("switch-disk", makeCommandSwitchDisk());
|
||||
command_descriptions.emplace("current_disk_with_path", makeCommandGetCurrentDiskAndPath());
|
||||
command_descriptions.emplace("touch", makeCommandTouch());
|
||||
command_descriptions.emplace("help", makeCommandHelp(*this));
|
||||
#ifdef CLICKHOUSE_CLOUD
|
||||
command_descriptions.emplace("packed-io", makeCommandPackedIO());
|
||||
#endif
|
||||
for (const auto & [command_name, command_ptr] : command_descriptions)
|
||||
{
|
||||
if (command_name != command_ptr->command_name)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Command name inside map doesn't coincide with actual command name");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DisksApp::processOptions()
|
||||
@ -93,76 +319,122 @@ void DisksApp::processOptions()
|
||||
config().setBool("save-logs", true);
|
||||
if (options.count("log-level"))
|
||||
config().setString("log-level", options["log-level"].as<String>());
|
||||
if (options.count("test-mode"))
|
||||
config().setBool("test-mode", true);
|
||||
if (options.count("query"))
|
||||
query = std::optional{options["query"].as<String>()};
|
||||
}
|
||||
|
||||
DisksApp::~DisksApp()
|
||||
|
||||
void DisksApp::printEntryHelpMessage() const
|
||||
{
|
||||
if (global_context)
|
||||
global_context->shutdown();
|
||||
std::cout << "\x1b[1;33m ClickHouse disk management tool \x1b[0m \n";
|
||||
std::cout << options_description << '\n';
|
||||
}
|
||||
|
||||
void DisksApp::init(std::vector<String> & common_arguments)
|
||||
|
||||
void DisksApp::printAvailableCommandsHelpMessage() const
|
||||
{
|
||||
stopOptionsProcessing();
|
||||
std::cout << "\x1b[1;32mAvailable commands:\x1b[0m\n";
|
||||
std::vector<std::pair<String, CommandPtr>> commands_with_aliases_and_descrtiptions{};
|
||||
size_t maximal_command_length = 0;
|
||||
for (const auto & [command_name, command_ptr] : command_descriptions)
|
||||
{
|
||||
std::string command_string = getCommandLineWithAliases(command_ptr);
|
||||
maximal_command_length = std::max(maximal_command_length, command_string.size());
|
||||
commands_with_aliases_and_descrtiptions.push_back({std::move(command_string), command_descriptions.at(command_name)});
|
||||
}
|
||||
for (const auto & [command_with_aliases, command_ptr] : commands_with_aliases_and_descrtiptions)
|
||||
{
|
||||
std::cout << "\x1b[1;33m" << command_with_aliases << "\x1b[0m" << std::string(5, ' ') << "\x1b[1;33m" << command_ptr->description
|
||||
<< "\x1b[0m \n";
|
||||
std::cout << command_ptr->options_description;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
ProgramOptionsDescription options_description{createOptionsDescription("clickhouse-disks", getTerminalWidth())};
|
||||
void DisksApp::printCommandHelpMessage(CommandPtr command) const
|
||||
{
|
||||
String command_name_with_aliases = getCommandLineWithAliases(command);
|
||||
std::cout << "\x1b[1;32m" << command_name_with_aliases << "\x1b[0m" << std::string(2, ' ') << command->description << "\n";
|
||||
std::cout << command->options_description;
|
||||
}
|
||||
|
||||
po::positional_options_description positional_options_description;
|
||||
void DisksApp::printCommandHelpMessage(String command_name) const
|
||||
{
|
||||
printCommandHelpMessage(getCommandByName(command_name));
|
||||
}
|
||||
|
||||
addOptions(options_description, positional_options_description);
|
||||
String DisksApp::getCommandLineWithAliases(CommandPtr command) const
|
||||
{
|
||||
String command_string = command->command_name;
|
||||
bool need_comma = false;
|
||||
for (const auto & [alias_name, alias_command_name] : aliases)
|
||||
{
|
||||
if (alias_command_name == command->command_name)
|
||||
{
|
||||
if (std::exchange(need_comma, true))
|
||||
command_string += ",";
|
||||
else
|
||||
command_string += "(";
|
||||
command_string += alias_name;
|
||||
}
|
||||
}
|
||||
command_string += (need_comma ? ")" : "");
|
||||
return command_string;
|
||||
}
|
||||
|
||||
size_t command_pos = findCommandPos(common_arguments);
|
||||
std::vector<String> global_flags(command_pos);
|
||||
command_arguments.resize(common_arguments.size() - command_pos);
|
||||
copy(common_arguments.begin(), common_arguments.begin() + command_pos, global_flags.begin());
|
||||
copy(common_arguments.begin() + command_pos, common_arguments.end(), command_arguments.begin());
|
||||
void DisksApp::initializeHistoryFile()
|
||||
{
|
||||
String home_path;
|
||||
const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe)
|
||||
if (home_path_cstr)
|
||||
home_path = home_path_cstr;
|
||||
if (config().has("history-file"))
|
||||
history_file = config().getString("history-file");
|
||||
else
|
||||
history_file = home_path + "/.disks-file-history";
|
||||
|
||||
parseAndCheckOptions(options_description, positional_options_description, global_flags);
|
||||
if (!history_file.empty() && !fs::exists(history_file))
|
||||
{
|
||||
try
|
||||
{
|
||||
FS::createFile(history_file);
|
||||
}
|
||||
catch (const ErrnoException & e)
|
||||
{
|
||||
if (e.getErrno() != EEXIST)
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DisksApp::init(const std::vector<String> & common_arguments)
|
||||
{
|
||||
addOptions();
|
||||
parseAndCheckOptions(common_arguments, options_description, options);
|
||||
|
||||
po::notify(options);
|
||||
|
||||
if (options.count("help"))
|
||||
{
|
||||
printHelpMessage(options_description);
|
||||
printEntryHelpMessage();
|
||||
printAvailableCommandsHelpMessage();
|
||||
exit(0); // NOLINT(concurrency-mt-unsafe)
|
||||
}
|
||||
|
||||
if (!supported_commands.contains(command_name))
|
||||
{
|
||||
std::cerr << "Unknown command name: " << command_name << "\n";
|
||||
printHelpMessage(options_description);
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments");
|
||||
}
|
||||
|
||||
processOptions();
|
||||
}
|
||||
|
||||
void DisksApp::parseAndCheckOptions(
|
||||
ProgramOptionsDescription & options_description_,
|
||||
boost::program_options::positional_options_description & positional_options_description,
|
||||
std::vector<String> & arguments)
|
||||
String DisksApp::getDefaultConfigFileName()
|
||||
{
|
||||
auto parser = po::command_line_parser(arguments)
|
||||
.options(options_description_)
|
||||
.positional(positional_options_description)
|
||||
.allow_unregistered();
|
||||
|
||||
po::parsed_options parsed = parser.run();
|
||||
po::store(parsed, options);
|
||||
|
||||
auto positional_arguments = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional);
|
||||
for (const auto & arg : positional_arguments)
|
||||
{
|
||||
if (command_descriptions.contains(arg))
|
||||
{
|
||||
command_name = arg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return "/etc/clickhouse-server/config.xml";
|
||||
}
|
||||
|
||||
int DisksApp::main(const std::vector<String> & /*args*/)
|
||||
{
|
||||
std::vector<std::string> keys;
|
||||
config().keys(keys);
|
||||
if (config().has("config-file") || fs::exists(getDefaultConfigFileName()))
|
||||
{
|
||||
String config_path = config().getString("config-file", getDefaultConfigFileName());
|
||||
@ -173,9 +445,13 @@ int DisksApp::main(const std::vector<String> & /*args*/)
|
||||
}
|
||||
else
|
||||
{
|
||||
printEntryHelpMessage();
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specified");
|
||||
}
|
||||
|
||||
config().keys(keys);
|
||||
initializeHistoryFile();
|
||||
|
||||
if (config().has("save-logs"))
|
||||
{
|
||||
auto log_level = config().getString("log-level", "trace");
|
||||
@ -200,61 +476,68 @@ int DisksApp::main(const std::vector<String> & /*args*/)
|
||||
global_context->setApplicationType(Context::ApplicationType::DISKS);
|
||||
|
||||
String path = config().getString("path", DBMS_DEFAULT_PATH);
|
||||
|
||||
global_context->setPath(path);
|
||||
|
||||
auto & command = command_descriptions[command_name];
|
||||
String main_disk = config().getString("disk", "default");
|
||||
|
||||
auto command_options = command->getCommandOptions();
|
||||
std::vector<String> args;
|
||||
if (command_options)
|
||||
auto validator = [](const Poco::Util::AbstractConfiguration &, const std::string &, const std::string &) { return true; };
|
||||
|
||||
constexpr auto config_prefix = "storage_configuration.disks";
|
||||
auto disk_selector = std::make_shared<DiskSelector>(std::unordered_set<String>{"cache", "encrypted"});
|
||||
disk_selector->initialize(config(), config_prefix, global_context, validator);
|
||||
|
||||
std::vector<std::pair<DiskPtr, std::optional<String>>> disks_with_path;
|
||||
|
||||
for (const auto & [_, disk_ptr] : disk_selector->getDisksMap())
|
||||
{
|
||||
auto parser = po::command_line_parser(command_arguments).options(*command_options).allow_unregistered();
|
||||
po::parsed_options parsed = parser.run();
|
||||
po::store(parsed, options);
|
||||
po::notify(options);
|
||||
disks_with_path.emplace_back(
|
||||
disk_ptr, (disk_ptr->getName() == "local") ? std::optional{fs::current_path().string()} : std::nullopt);
|
||||
}
|
||||
|
||||
args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional);
|
||||
command->processOptions(config(), options);
|
||||
|
||||
client = std::make_unique<DisksClient>(std::move(disks_with_path), main_disk);
|
||||
|
||||
suggest.setCompletionsCallback([&](const String & prefix, size_t /* prefix_length */) { return getCompletions(prefix); });
|
||||
|
||||
if (!query.has_value())
|
||||
{
|
||||
runInteractive();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto parser = po::command_line_parser(command_arguments).options({}).allow_unregistered();
|
||||
po::parsed_options parsed = parser.run();
|
||||
args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional);
|
||||
processQueryText(query.value());
|
||||
}
|
||||
|
||||
std::unordered_set<std::string> disks
|
||||
{
|
||||
config().getString("disk", "default"),
|
||||
config().getString("disk-from", config().getString("disk", "default")),
|
||||
config().getString("disk-to", config().getString("disk", "default")),
|
||||
};
|
||||
|
||||
auto validator = [&disks](
|
||||
const Poco::Util::AbstractConfiguration & config,
|
||||
const std::string & disk_config_prefix,
|
||||
const std::string & disk_name)
|
||||
{
|
||||
if (!disks.contains(disk_name))
|
||||
return false;
|
||||
|
||||
const auto disk_type = config.getString(disk_config_prefix + ".type", "local");
|
||||
|
||||
if (disk_type == "cache")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk type 'cache' of disk {} is not supported by clickhouse-disks", disk_name);
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
constexpr auto config_prefix = "storage_configuration.disks";
|
||||
auto disk_selector = std::make_shared<DiskSelector>();
|
||||
disk_selector->initialize(config(), config_prefix, global_context, validator);
|
||||
|
||||
command->execute(args, disk_selector, config());
|
||||
|
||||
return Application::EXIT_OK;
|
||||
}
|
||||
|
||||
DisksApp::~DisksApp()
|
||||
{
|
||||
client.reset(nullptr);
|
||||
if (global_context)
|
||||
global_context->shutdown();
|
||||
}
|
||||
|
||||
void DisksApp::runInteractiveTestMode()
|
||||
{
|
||||
for (String input; std::getline(std::cin, input);)
|
||||
{
|
||||
if (!processQueryText(input))
|
||||
break;
|
||||
|
||||
std::cout << "\a\a\a\a" << std::endl;
|
||||
std::cerr << std::flush;
|
||||
}
|
||||
}
|
||||
|
||||
void DisksApp::runInteractive()
|
||||
{
|
||||
if (config().hasOption("test-mode"))
|
||||
runInteractiveTestMode();
|
||||
else
|
||||
runInteractiveReplxx();
|
||||
}
|
||||
}
|
||||
|
||||
int mainEntryClickHouseDisks(int argc, char ** argv)
|
||||
@ -269,16 +552,16 @@ int mainEntryClickHouseDisks(int argc, char ** argv)
|
||||
catch (const DB::Exception & e)
|
||||
{
|
||||
std::cerr << DB::getExceptionMessage(e, false) << std::endl;
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
catch (const boost::program_options::error & e)
|
||||
{
|
||||
std::cerr << "Bad arguments: " << e.what() << std::endl;
|
||||
return DB::ErrorCodes::BAD_ARGUMENTS;
|
||||
return 0;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << DB::getCurrentExceptionMessage(true) << std::endl;
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@ -1,61 +1,107 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <Client/ReplxxLineReader.h>
|
||||
#include <Loggers/Loggers.h>
|
||||
#include "DisksClient.h"
|
||||
#include "ICommand_fwd.h"
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <boost/program_options/options_description.hpp>
|
||||
#include <boost/program_options/variables_map.hpp>
|
||||
#include <Poco/Util/Application.h>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ICommand;
|
||||
using CommandPtr = std::unique_ptr<ICommand>;
|
||||
|
||||
namespace po = boost::program_options;
|
||||
using ProgramOptionsDescription = boost::program_options::options_description;
|
||||
using CommandLineOptions = boost::program_options::variables_map;
|
||||
|
||||
class DisksApp : public Poco::Util::Application, public Loggers
|
||||
class DisksApp : public Poco::Util::Application
|
||||
{
|
||||
public:
|
||||
DisksApp() = default;
|
||||
~DisksApp() override;
|
||||
void addOptions();
|
||||
|
||||
void init(std::vector<String> & common_arguments);
|
||||
|
||||
int main(const std::vector<String> & args) override;
|
||||
|
||||
protected:
|
||||
static String getDefaultConfigFileName();
|
||||
|
||||
void addOptions(
|
||||
ProgramOptionsDescription & options_description,
|
||||
boost::program_options::positional_options_description & positional_options_description);
|
||||
void processOptions();
|
||||
|
||||
void printHelpMessage(ProgramOptionsDescription & command_option_description);
|
||||
bool processQueryText(const String & text);
|
||||
|
||||
size_t findCommandPos(std::vector<String> & common_arguments);
|
||||
void init(const std::vector<String> & common_arguments);
|
||||
|
||||
int main(const std::vector<String> & /*args*/) override;
|
||||
|
||||
CommandPtr getCommandByName(const String & command) const;
|
||||
|
||||
void initializeHistoryFile();
|
||||
|
||||
static void parseAndCheckOptions(
|
||||
const std::vector<String> & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options);
|
||||
|
||||
void printEntryHelpMessage() const;
|
||||
void printAvailableCommandsHelpMessage() const;
|
||||
void printCommandHelpMessage(String command_name) const;
|
||||
void printCommandHelpMessage(CommandPtr command) const;
|
||||
String getCommandLineWithAliases(CommandPtr command) const;
|
||||
|
||||
|
||||
std::vector<String> getCompletions(const String & prefix) const;
|
||||
|
||||
std::vector<String> getEmptyCompletion(String command_name) const;
|
||||
|
||||
~DisksApp() override;
|
||||
|
||||
private:
|
||||
void parseAndCheckOptions(
|
||||
ProgramOptionsDescription & options_description,
|
||||
boost::program_options::positional_options_description & positional_options_description,
|
||||
std::vector<String> & arguments);
|
||||
void runInteractive();
|
||||
void runInteractiveReplxx();
|
||||
void runInteractiveTestMode();
|
||||
|
||||
String getDefaultConfigFileName();
|
||||
|
||||
std::vector<String> getCommandsToComplete(const String & command_prefix) const;
|
||||
|
||||
// Fields responsible for the REPL work
|
||||
String history_file;
|
||||
LineReader::Suggest suggest;
|
||||
static LineReader::Patterns query_extenders;
|
||||
static LineReader::Patterns query_delimiters;
|
||||
static String word_break_characters;
|
||||
|
||||
// General command line arguments parsing fields
|
||||
|
||||
protected:
|
||||
ContextMutablePtr global_context;
|
||||
SharedContextHolder shared_context;
|
||||
|
||||
String command_name;
|
||||
std::vector<String> command_arguments;
|
||||
|
||||
std::unordered_set<String> supported_commands;
|
||||
ContextMutablePtr global_context;
|
||||
ProgramOptionsDescription options_description;
|
||||
CommandLineOptions options;
|
||||
std::unordered_map<String, CommandPtr> command_descriptions;
|
||||
|
||||
po::variables_map options;
|
||||
};
|
||||
std::optional<String> query;
|
||||
|
||||
const std::unordered_map<String, String> aliases
|
||||
= {{"cp", "copy"},
|
||||
{"mv", "move"},
|
||||
{"ls", "list"},
|
||||
{"list_disks", "list-disks"},
|
||||
{"ln", "link"},
|
||||
{"rm", "remove"},
|
||||
{"cat", "read"},
|
||||
{"r", "read"},
|
||||
{"w", "write"},
|
||||
{"create", "touch"},
|
||||
{"delete", "remove"},
|
||||
{"ls-disks", "list-disks"},
|
||||
{"ls_disks", "list-disks"},
|
||||
{"packed_io", "packed-io"},
|
||||
{"change-dir", "cd"},
|
||||
{"change_dir", "cd"},
|
||||
{"switch_disk", "switch-disk"},
|
||||
{"current", "current_disk_with_path"},
|
||||
{"current_disk", "current_disk_with_path"},
|
||||
{"current_path", "current_disk_with_path"},
|
||||
{"cur", "current_disk_with_path"}};
|
||||
|
||||
std::set<String> multidisk_commands = {"copy", "packed-io", "switch-disk", "cd"};
|
||||
|
||||
std::unique_ptr<DisksClient> client{};
|
||||
};
|
||||
}
|
||||
|
263
programs/disks/DisksClient.cpp
Normal file
263
programs/disks/DisksClient.cpp
Normal file
@ -0,0 +1,263 @@
|
||||
#include "DisksClient.h"
|
||||
#include <Client/ClientBase.h>
|
||||
#include <Client/ReplxxLineReader.h>
|
||||
#include <Disks/registerDisks.h>
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
|
||||
#include <Formats/registerFormats.h>
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
};
|
||||
|
||||
namespace DB
|
||||
{
|
||||
DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional<String> path_) : disk(disk_)
|
||||
{
|
||||
if (path_.has_value())
|
||||
{
|
||||
if (!fs::path{path_.value()}.is_absolute())
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} is not absolute", path_.value());
|
||||
}
|
||||
path = path_.value();
|
||||
}
|
||||
else
|
||||
{
|
||||
path = String{"/"};
|
||||
}
|
||||
|
||||
String relative_path = normalizePathAndGetAsRelative(path);
|
||||
if (disk->isDirectory(relative_path) || (relative_path.empty() && (disk->isDirectory("/"))))
|
||||
{
|
||||
return;
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Initializing path {} (normalized path: {}) at disk {} is not a directory",
|
||||
path,
|
||||
relative_path,
|
||||
disk->getName());
|
||||
}
|
||||
|
||||
std::vector<String> DiskWithPath::listAllFilesByPath(const String & any_path) const
|
||||
{
|
||||
if (isDirectory(any_path))
|
||||
{
|
||||
std::vector<String> file_names;
|
||||
disk->listFiles(getRelativeFromRoot(any_path), file_names);
|
||||
return file_names;
|
||||
}
|
||||
else
|
||||
{
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<String> DiskWithPath::getAllFilesByPattern(const String & pattern) const
|
||||
{
|
||||
auto [path_before, path_after] = [&]() -> std::pair<String, String>
|
||||
{
|
||||
auto slash_pos = pattern.find_last_of('/');
|
||||
if (slash_pos >= pattern.size())
|
||||
{
|
||||
return {"", pattern};
|
||||
}
|
||||
else
|
||||
{
|
||||
return {pattern.substr(0, slash_pos + 1), pattern.substr(slash_pos + 1, pattern.size() - slash_pos - 1)};
|
||||
}
|
||||
}();
|
||||
|
||||
if (!isDirectory(path_before))
|
||||
{
|
||||
return {};
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<String> file_names = listAllFilesByPath(path_before);
|
||||
|
||||
std::vector<String> answer;
|
||||
|
||||
for (const auto & file_name : file_names)
|
||||
{
|
||||
if (file_name.starts_with(path_after))
|
||||
{
|
||||
String file_pattern = path_before + file_name;
|
||||
if (isDirectory(file_pattern))
|
||||
{
|
||||
file_pattern = file_pattern + "/";
|
||||
}
|
||||
answer.push_back(file_pattern);
|
||||
}
|
||||
}
|
||||
return answer;
|
||||
}
|
||||
};
|
||||
|
||||
void DiskWithPath::setPath(const String & any_path)
|
||||
{
|
||||
if (isDirectory(any_path))
|
||||
{
|
||||
path = getAbsolutePath(any_path);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} at disk {} is not a directory", any_path, disk->getName());
|
||||
}
|
||||
}
|
||||
|
||||
String DiskWithPath::validatePathAndGetAsRelative(const String & path)
|
||||
{
|
||||
String lexically_normal_path = fs::path(path).lexically_normal();
|
||||
if (lexically_normal_path.find("..") != std::string::npos)
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path);
|
||||
|
||||
/// If path is absolute we should keep it as relative inside disk, so disk will look like
|
||||
/// an ordinary filesystem with root.
|
||||
if (fs::path(lexically_normal_path).is_absolute())
|
||||
return lexically_normal_path.substr(1);
|
||||
|
||||
return lexically_normal_path;
|
||||
}
|
||||
|
||||
String DiskWithPath::normalizePathAndGetAsRelative(const String & messyPath)
|
||||
{
|
||||
std::filesystem::path path(messyPath);
|
||||
std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path);
|
||||
String npath = canonical_path.make_preferred().string();
|
||||
return validatePathAndGetAsRelative(npath);
|
||||
}
|
||||
|
||||
String DiskWithPath::normalizePath(const String & path)
|
||||
{
|
||||
std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path);
|
||||
return canonical_path.make_preferred().string();
|
||||
}
|
||||
|
||||
DisksClient::DisksClient(std::vector<std::pair<DiskPtr, std::optional<String>>> && disks_with_paths, std::optional<String> begin_disk)
|
||||
{
|
||||
if (disks_with_paths.empty())
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing array of disks is empty");
|
||||
}
|
||||
if (!begin_disk.has_value())
|
||||
{
|
||||
begin_disk = disks_with_paths[0].first->getName();
|
||||
}
|
||||
bool has_begin_disk = false;
|
||||
for (auto & [disk, path] : disks_with_paths)
|
||||
{
|
||||
addDisk(disk, path);
|
||||
if (disk->getName() == begin_disk.value())
|
||||
{
|
||||
has_begin_disk = true;
|
||||
}
|
||||
}
|
||||
if (!has_begin_disk)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no begin_disk '{}' in initializing array", begin_disk.value());
|
||||
}
|
||||
current_disk = std::move(begin_disk.value());
|
||||
}
|
||||
|
||||
const DiskWithPath & DisksClient::getDiskWithPath(const String & disk) const
|
||||
{
|
||||
try
|
||||
{
|
||||
return disks.at(disk);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk);
|
||||
}
|
||||
}
|
||||
|
||||
DiskWithPath & DisksClient::getDiskWithPath(const String & disk)
|
||||
{
|
||||
try
|
||||
{
|
||||
return disks.at(disk);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk);
|
||||
}
|
||||
}
|
||||
|
||||
const DiskWithPath & DisksClient::getCurrentDiskWithPath() const
|
||||
{
|
||||
try
|
||||
{
|
||||
return disks.at(current_disk);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client");
|
||||
}
|
||||
}
|
||||
|
||||
DiskWithPath & DisksClient::getCurrentDiskWithPath()
|
||||
{
|
||||
try
|
||||
{
|
||||
return disks.at(current_disk);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client");
|
||||
}
|
||||
}
|
||||
|
||||
void DisksClient::switchToDisk(const String & disk_, const std::optional<String> & path_)
|
||||
{
|
||||
if (disks.contains(disk_))
|
||||
{
|
||||
if (path_.has_value())
|
||||
{
|
||||
disks.at(disk_).setPath(path_.value());
|
||||
}
|
||||
current_disk = disk_;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk_);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<String> DisksClient::getAllDiskNames() const
|
||||
{
|
||||
std::vector<String> answer{};
|
||||
answer.reserve(disks.size());
|
||||
for (const auto & [disk_name, _] : disks)
|
||||
{
|
||||
answer.push_back(disk_name);
|
||||
}
|
||||
return answer;
|
||||
}
|
||||
|
||||
std::vector<String> DisksClient::getAllFilesByPatternFromAllDisks(const String & pattern) const
|
||||
{
|
||||
std::vector<String> answer{};
|
||||
for (const auto & [_, disk] : disks)
|
||||
{
|
||||
for (auto & word : disk.getAllFilesByPattern(pattern))
|
||||
{
|
||||
answer.push_back(word);
|
||||
}
|
||||
}
|
||||
return answer;
|
||||
}
|
||||
|
||||
void DisksClient::addDisk(DiskPtr disk_, const std::optional<String> & path_)
|
||||
{
|
||||
String disk_name = disk_->getName();
|
||||
if (disks.contains(disk_->getName()))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' already exists", disk_name);
|
||||
}
|
||||
disks.emplace(disk_name, DiskWithPath{disk_, path_});
|
||||
}
|
||||
}
|
89
programs/disks/DisksClient.h
Normal file
89
programs/disks/DisksClient.h
Normal file
@ -0,0 +1,89 @@
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <Client/ReplxxLineReader.h>
|
||||
#include <Loggers/Loggers.h>
|
||||
#include "Disks/IDisk.h"
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <boost/program_options/options_description.hpp>
|
||||
#include <boost/program_options/variables_map.hpp>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
std::vector<String> split(const String & text, const String & delimiters);
|
||||
|
||||
using ProgramOptionsDescription = boost::program_options::options_description;
|
||||
using CommandLineOptions = boost::program_options::variables_map;
|
||||
|
||||
class DiskWithPath
|
||||
{
|
||||
public:
|
||||
explicit DiskWithPath(DiskPtr disk_, std::optional<String> path_ = std::nullopt);
|
||||
|
||||
String getAbsolutePath(const String & any_path) const { return normalizePath(fs::path(path) / any_path); }
|
||||
|
||||
String getCurrentPath() const { return path; }
|
||||
|
||||
bool isDirectory(const String & any_path) const
|
||||
{
|
||||
return disk->isDirectory(getRelativeFromRoot(any_path)) || (getRelativeFromRoot(any_path).empty() && (disk->isDirectory("/")));
|
||||
}
|
||||
|
||||
std::vector<String> listAllFilesByPath(const String & any_path) const;
|
||||
|
||||
std::vector<String> getAllFilesByPattern(const String & pattern) const;
|
||||
|
||||
DiskPtr getDisk() const { return disk; }
|
||||
|
||||
void setPath(const String & any_path);
|
||||
|
||||
String getRelativeFromRoot(const String & any_path) const { return normalizePathAndGetAsRelative(getAbsolutePath(any_path)); }
|
||||
|
||||
private:
|
||||
static String validatePathAndGetAsRelative(const String & path);
|
||||
static std::string normalizePathAndGetAsRelative(const std::string & messyPath);
|
||||
static std::string normalizePath(const std::string & messyPath);
|
||||
|
||||
const DiskPtr disk;
|
||||
String path;
|
||||
};
|
||||
|
||||
class DisksClient
|
||||
{
|
||||
public:
|
||||
explicit DisksClient(std::vector<std::pair<DiskPtr, std::optional<String>>> && disks_with_paths, std::optional<String> begin_disk);
|
||||
|
||||
const DiskWithPath & getDiskWithPath(const String & disk) const;
|
||||
|
||||
DiskWithPath & getDiskWithPath(const String & disk);
|
||||
|
||||
const DiskWithPath & getCurrentDiskWithPath() const;
|
||||
|
||||
DiskWithPath & getCurrentDiskWithPath();
|
||||
|
||||
DiskPtr getCurrentDisk() const { return getCurrentDiskWithPath().getDisk(); }
|
||||
|
||||
DiskPtr getDisk(const String & disk) const { return getDiskWithPath(disk).getDisk(); }
|
||||
|
||||
void switchToDisk(const String & disk_, const std::optional<String> & path_);
|
||||
|
||||
std::vector<String> getAllDiskNames() const;
|
||||
|
||||
std::vector<String> getAllFilesByPatternFromAllDisks(const String & pattern) const;
|
||||
|
||||
|
||||
private:
|
||||
void addDisk(DiskPtr disk_, const std::optional<String> & path_);
|
||||
|
||||
String current_disk;
|
||||
std::unordered_map<String, DiskWithPath> disks;
|
||||
};
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
#include "ICommand.h"
|
||||
#include <iostream>
|
||||
#include "DisksClient.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -10,43 +10,42 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
void ICommand::printHelpMessage() const
|
||||
CommandLineOptions ICommand::processCommandLineArguments(const Strings & commands)
|
||||
{
|
||||
std::cout << "Command: " << command_name << '\n';
|
||||
std::cout << "Description: " << description << '\n';
|
||||
std::cout << "Usage: " << usage << '\n';
|
||||
CommandLineOptions options;
|
||||
auto parser = po::command_line_parser(commands);
|
||||
parser.options(options_description).positional(positional_options_description);
|
||||
|
||||
if (command_option_description)
|
||||
po::parsed_options parsed = parser.run();
|
||||
po::store(parsed, options);
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
void ICommand::execute(const Strings & commands, DisksClient & client)
|
||||
{
|
||||
try
|
||||
{
|
||||
auto options = *command_option_description;
|
||||
if (!options.options().empty())
|
||||
std::cout << options << '\n';
|
||||
processCommandLineArguments(commands);
|
||||
}
|
||||
catch (std::exception & exc)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}", exc.what());
|
||||
}
|
||||
executeImpl(processCommandLineArguments(commands), client);
|
||||
}
|
||||
|
||||
DiskWithPath & ICommand::getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name)
|
||||
{
|
||||
auto disk_name = getValueFromCommandLineOptionsWithOptional<String>(options, name);
|
||||
if (disk_name.has_value())
|
||||
{
|
||||
return client.getDiskWithPath(disk_name.value());
|
||||
}
|
||||
else
|
||||
{
|
||||
return client.getCurrentDiskWithPath();
|
||||
}
|
||||
}
|
||||
|
||||
void ICommand::addOptions(ProgramOptionsDescription & options_description)
|
||||
{
|
||||
if (!command_option_description || command_option_description->options().empty())
|
||||
return;
|
||||
|
||||
options_description.add(*command_option_description);
|
||||
}
|
||||
|
||||
String ICommand::validatePathAndGetAsRelative(const String & path)
|
||||
{
|
||||
/// If path contain non-normalized symbols like . we will normalized them. If the resulting normalized path
|
||||
/// still contain '..' it can be dangerous, disallow such paths. Also since clickhouse-disks
|
||||
/// is not an interactive program (don't track you current path) it's OK to disallow .. paths.
|
||||
String lexically_normal_path = fs::path(path).lexically_normal();
|
||||
if (lexically_normal_path.find("..") != std::string::npos)
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path);
|
||||
|
||||
/// If path is absolute we should keep it as relative inside disk, so disk will look like
|
||||
/// an ordinary filesystem with root.
|
||||
if (fs::path(lexically_normal_path).is_absolute())
|
||||
return lexically_normal_path.substr(1);
|
||||
|
||||
return lexically_normal_path;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,66 +1,146 @@
|
||||
#pragma once
|
||||
|
||||
#include <Disks/IDisk.h>
|
||||
#include <optional>
|
||||
#include <Disks/DiskSelector.h>
|
||||
#include <Disks/IDisk.h>
|
||||
|
||||
#include <boost/any/bad_any_cast.hpp>
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
#include <Poco/Util/Application.h>
|
||||
#include "Common/Exception.h"
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
|
||||
#include <memory>
|
||||
#include <boost/program_options/positional_options.hpp>
|
||||
|
||||
#include "DisksApp.h"
|
||||
|
||||
#include "DisksClient.h"
|
||||
|
||||
#include "ICommand_fwd.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace po = boost::program_options;
|
||||
using ProgramOptionsDescription = boost::program_options::options_description;
|
||||
using CommandLineOptions = boost::program_options::variables_map;
|
||||
using ProgramOptionsDescription = po::options_description;
|
||||
using PositionalProgramOptionsDescription = po::positional_options_description;
|
||||
using CommandLineOptions = po::variables_map;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
class ICommand
|
||||
{
|
||||
public:
|
||||
ICommand() = default;
|
||||
explicit ICommand() = default;
|
||||
|
||||
virtual ~ICommand() = default;
|
||||
|
||||
virtual void execute(
|
||||
const std::vector<String> & command_arguments,
|
||||
std::shared_ptr<DiskSelector> & disk_selector,
|
||||
Poco::Util::LayeredConfiguration & config) = 0;
|
||||
void execute(const Strings & commands, DisksClient & client);
|
||||
|
||||
const std::optional<ProgramOptionsDescription> & getCommandOptions() const { return command_option_description; }
|
||||
virtual void executeImpl(const CommandLineOptions & options, DisksClient & client) = 0;
|
||||
|
||||
void addOptions(ProgramOptionsDescription & options_description);
|
||||
|
||||
virtual void processOptions(Poco::Util::LayeredConfiguration & config, po::variables_map & options) const = 0;
|
||||
CommandLineOptions processCommandLineArguments(const Strings & commands);
|
||||
|
||||
protected:
|
||||
void printHelpMessage() const;
|
||||
template <typename T>
|
||||
static T getValueFromCommandLineOptions(const CommandLineOptions & options, const String & name)
|
||||
{
|
||||
try
|
||||
{
|
||||
return options[name].as<T>();
|
||||
}
|
||||
catch (boost::bad_any_cast &)
|
||||
{
|
||||
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Argument '{}' has wrong type and can't be parsed", name);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static T getValueFromCommandLineOptionsThrow(const CommandLineOptions & options, const String & name)
|
||||
{
|
||||
if (options.count(name))
|
||||
{
|
||||
return getValueFromCommandLineOptions<T>(options, name);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Mandatory argument '{}' is missing", name);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static T getValueFromCommandLineOptionsWithDefault(const CommandLineOptions & options, const String & name, const T & default_value)
|
||||
{
|
||||
if (options.count(name))
|
||||
{
|
||||
return getValueFromCommandLineOptions<T>(options, name);
|
||||
}
|
||||
else
|
||||
{
|
||||
return default_value;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::optional<T> getValueFromCommandLineOptionsWithOptional(const CommandLineOptions & options, const String & name)
|
||||
{
|
||||
if (options.count(name))
|
||||
{
|
||||
return std::optional{getValueFromCommandLineOptions<T>(options, name)};
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
DiskWithPath & getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name);
|
||||
|
||||
String getTargetLocation(const String & path_from, DiskWithPath & disk_to, const String & path_to)
|
||||
{
|
||||
if (!disk_to.getDisk()->isDirectory(path_to))
|
||||
{
|
||||
return path_to;
|
||||
}
|
||||
String copied_path_from = path_from;
|
||||
if (copied_path_from.ends_with('/'))
|
||||
{
|
||||
copied_path_from.pop_back();
|
||||
}
|
||||
String plain_filename = fs::path(copied_path_from).filename();
|
||||
|
||||
return fs::path{path_to} / plain_filename;
|
||||
}
|
||||
|
||||
static String validatePathAndGetAsRelative(const String & path);
|
||||
|
||||
public:
|
||||
String command_name;
|
||||
String description;
|
||||
ProgramOptionsDescription options_description;
|
||||
|
||||
protected:
|
||||
std::optional<ProgramOptionsDescription> command_option_description;
|
||||
String usage;
|
||||
po::positional_options_description positional_options_description;
|
||||
PositionalProgramOptionsDescription positional_options_description;
|
||||
};
|
||||
|
||||
using CommandPtr = std::unique_ptr<ICommand>;
|
||||
|
||||
}
|
||||
|
||||
DB::CommandPtr makeCommandCopy();
|
||||
DB::CommandPtr makeCommandLink();
|
||||
DB::CommandPtr makeCommandList();
|
||||
DB::CommandPtr makeCommandListDisks();
|
||||
DB::CommandPtr makeCommandList();
|
||||
DB::CommandPtr makeCommandChangeDirectory();
|
||||
DB::CommandPtr makeCommandLink();
|
||||
DB::CommandPtr makeCommandMove();
|
||||
DB::CommandPtr makeCommandRead();
|
||||
DB::CommandPtr makeCommandRemove();
|
||||
DB::CommandPtr makeCommandWrite();
|
||||
DB::CommandPtr makeCommandMkDir();
|
||||
DB::CommandPtr makeCommandSwitchDisk();
|
||||
DB::CommandPtr makeCommandGetCurrentDiskAndPath();
|
||||
DB::CommandPtr makeCommandHelp(const DisksApp & disks_app);
|
||||
DB::CommandPtr makeCommandTouch();
|
||||
#ifdef CLICKHOUSE_CLOUD
|
||||
DB::CommandPtr makeCommandPackedIO();
|
||||
#endif
|
||||
}
|
||||
|
10
programs/disks/ICommand_fwd.h
Normal file
10
programs/disks/ICommand_fwd.h
Normal file
@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class ICommand;
|
||||
|
||||
using CommandPtr = std::shared_ptr<ICommand>;
|
||||
}
|
@ -264,6 +264,35 @@ HTTPContextPtr httpContext()
|
||||
return std::make_shared<KeeperHTTPContext>(Context::getGlobalContextInstance());
|
||||
}
|
||||
|
||||
String getKeeperPath(Poco::Util::LayeredConfiguration & config)
|
||||
{
|
||||
String path;
|
||||
if (config.has("keeper_server.storage_path"))
|
||||
{
|
||||
path = config.getString("keeper_server.storage_path");
|
||||
}
|
||||
else if (config.has("keeper_server.log_storage_path"))
|
||||
{
|
||||
path = std::filesystem::path(config.getString("keeper_server.log_storage_path")).parent_path();
|
||||
}
|
||||
else if (config.has("keeper_server.snapshot_storage_path"))
|
||||
{
|
||||
path = std::filesystem::path(config.getString("keeper_server.snapshot_storage_path")).parent_path();
|
||||
}
|
||||
else if (std::filesystem::is_directory(std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination"))
|
||||
{
|
||||
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
|
||||
"By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly",
|
||||
KEEPER_DEFAULT_PATH, String{std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination"});
|
||||
}
|
||||
else
|
||||
{
|
||||
path = KEEPER_DEFAULT_PATH;
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
int Keeper::main(const std::vector<std::string> & /*args*/)
|
||||
@ -316,31 +345,7 @@ try
|
||||
|
||||
updateMemorySoftLimitInConfig(config());
|
||||
|
||||
std::string path;
|
||||
|
||||
if (config().has("keeper_server.storage_path"))
|
||||
{
|
||||
path = config().getString("keeper_server.storage_path");
|
||||
}
|
||||
else if (config().has("keeper_server.log_storage_path"))
|
||||
{
|
||||
path = std::filesystem::path(config().getString("keeper_server.log_storage_path")).parent_path();
|
||||
}
|
||||
else if (config().has("keeper_server.snapshot_storage_path"))
|
||||
{
|
||||
path = std::filesystem::path(config().getString("keeper_server.snapshot_storage_path")).parent_path();
|
||||
}
|
||||
else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"))
|
||||
{
|
||||
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
|
||||
"By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly",
|
||||
KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"});
|
||||
}
|
||||
else
|
||||
{
|
||||
path = KEEPER_DEFAULT_PATH;
|
||||
}
|
||||
|
||||
std::string path = getKeeperPath(config());
|
||||
std::filesystem::create_directories(path);
|
||||
|
||||
/// Check that the process user id matches the owner of the data.
|
||||
@ -554,7 +559,7 @@ try
|
||||
auto main_config_reloader = std::make_unique<ConfigReloader>(
|
||||
config_path,
|
||||
extra_paths,
|
||||
config().getString("path", KEEPER_DEFAULT_PATH),
|
||||
getKeeperPath(config()),
|
||||
std::move(unused_cache),
|
||||
unused_event,
|
||||
[&](ConfigurationPtr config, bool /* initial_loading */)
|
||||
|
9
programs/keeper/conf.d/local.yaml
Normal file
9
programs/keeper/conf.d/local.yaml
Normal file
@ -0,0 +1,9 @@
|
||||
logger:
|
||||
log:
|
||||
"@remove": remove
|
||||
errorlog:
|
||||
"@remove": remove
|
||||
console: 1
|
||||
keeper_server:
|
||||
log_storage_path: ./logs
|
||||
snapshot_storage_path: ./snapshots
|
@ -587,6 +587,54 @@ static void sanityChecks(Server & server)
|
||||
}
|
||||
}
|
||||
|
||||
void loadStartupScripts(const Poco::Util::AbstractConfiguration & config, ContextMutablePtr context, Poco::Logger * log)
|
||||
{
|
||||
try
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
config.keys("startup_scripts", keys);
|
||||
|
||||
SetResultDetailsFunc callback;
|
||||
for (const auto & key : keys)
|
||||
{
|
||||
std::string full_prefix = "startup_scripts." + key;
|
||||
|
||||
if (config.has(full_prefix + ".condition"))
|
||||
{
|
||||
auto condition = config.getString(full_prefix + ".condition");
|
||||
auto condition_read_buffer = ReadBufferFromString(condition);
|
||||
auto condition_write_buffer = WriteBufferFromOwnString();
|
||||
|
||||
LOG_DEBUG(log, "Checking startup query condition `{}`", condition);
|
||||
executeQuery(condition_read_buffer, condition_write_buffer, true, context, callback, QueryFlags{ .internal = true }, std::nullopt, {});
|
||||
|
||||
auto result = condition_write_buffer.str();
|
||||
|
||||
if (result != "1\n" && result != "true\n")
|
||||
{
|
||||
if (result != "0\n" && result != "false\n")
|
||||
context->addWarningMessage(fmt::format("The condition query returned `{}`, which can't be interpreted as a boolean (`0`, `false`, `1`, `true`). Will skip this query.", result));
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Condition is true, will execute the query next");
|
||||
}
|
||||
|
||||
auto query = config.getString(full_prefix + ".query");
|
||||
auto read_buffer = ReadBufferFromString(query);
|
||||
auto write_buffer = WriteBufferFromOwnString();
|
||||
|
||||
LOG_DEBUG(log, "Executing query `{}`", query);
|
||||
executeQuery(read_buffer, write_buffer, true, context, callback, QueryFlags{ .internal = true }, std::nullopt, {});
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, "Failed to parse startup scripts file");
|
||||
}
|
||||
}
|
||||
|
||||
static void initializeAzureSDKLogger(
|
||||
[[ maybe_unused ]] const ServerSettings & server_settings,
|
||||
[[ maybe_unused ]] int server_logs_level)
|
||||
@ -1990,6 +2038,11 @@ try
|
||||
/// otherwise there is a race condition between the system database initialization
|
||||
/// and creation of new tables in the database.
|
||||
waitLoad(TablesLoaderForegroundPoolId, system_startup_tasks);
|
||||
|
||||
/// Startup scripts can depend on the system log tables.
|
||||
if (config().has("startup_scripts") && !server_settings.prepare_system_log_tables_on_startup.changed)
|
||||
global_context->setServerSetting("prepare_system_log_tables_on_startup", true);
|
||||
|
||||
/// After attaching system databases we can initialize system log.
|
||||
global_context->initializeSystemLogs();
|
||||
global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
|
||||
@ -2138,6 +2191,9 @@ try
|
||||
load_metadata_tasks.clear();
|
||||
load_metadata_tasks.shrink_to_fit();
|
||||
|
||||
if (config().has("startup_scripts"))
|
||||
loadStartupScripts(config(), global_context, log);
|
||||
|
||||
{
|
||||
std::lock_guard lock(servers_lock);
|
||||
for (auto & server : servers)
|
||||
|
@ -33,7 +33,7 @@ public:
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node || !function_node->isAggregateFunction())
|
||||
if (!function_node || !function_node->isAggregateFunction() || !function_node->getResultType()->equals(DataTypeUInt64()))
|
||||
return;
|
||||
|
||||
auto function_name = function_node->getFunctionName();
|
||||
|
@ -187,9 +187,9 @@ void HedgedConnections::sendQuery(
|
||||
modified_settings.group_by_two_level_threshold_bytes = 0;
|
||||
}
|
||||
|
||||
const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0;
|
||||
const bool enable_offset_parallel_processing = context->canUseOffsetParallelReplicas();
|
||||
|
||||
if (offset_states.size() > 1 && enable_sample_offset_parallel_processing)
|
||||
if (offset_states.size() > 1 && enable_offset_parallel_processing)
|
||||
{
|
||||
modified_settings.parallel_replicas_count = offset_states.size();
|
||||
modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset;
|
||||
@ -201,7 +201,8 @@ void HedgedConnections::sendQuery(
|
||||
/// all servers involved in the distributed query processing.
|
||||
modified_settings.set("allow_experimental_analyzer", static_cast<bool>(modified_settings.allow_experimental_analyzer));
|
||||
|
||||
replica.connection->sendQuery(timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {});
|
||||
replica.connection->sendQuery(
|
||||
timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {});
|
||||
replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout);
|
||||
replica.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout);
|
||||
};
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Common/thread_local_rng.h>
|
||||
#include <Core/Protocol.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Interpreters/ClientInfo.h>
|
||||
@ -23,8 +24,8 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler)
|
||||
: settings(settings_)
|
||||
MultiplexedConnections::MultiplexedConnections(Connection & connection, ContextPtr context_, const ThrottlerPtr & throttler)
|
||||
: context(std::move(context_)), settings(context->getSettingsRef())
|
||||
{
|
||||
connection.setThrottler(throttler);
|
||||
|
||||
@ -36,9 +37,9 @@ MultiplexedConnections::MultiplexedConnections(Connection & connection, const Se
|
||||
}
|
||||
|
||||
|
||||
MultiplexedConnections::MultiplexedConnections(std::shared_ptr<Connection> connection_ptr_, const Settings & settings_, const ThrottlerPtr & throttler)
|
||||
: settings(settings_)
|
||||
, connection_ptr(connection_ptr_)
|
||||
MultiplexedConnections::MultiplexedConnections(
|
||||
std::shared_ptr<Connection> connection_ptr_, ContextPtr context_, const ThrottlerPtr & throttler)
|
||||
: context(std::move(context_)), settings(context->getSettingsRef()), connection_ptr(connection_ptr_)
|
||||
{
|
||||
connection_ptr->setThrottler(throttler);
|
||||
|
||||
@ -50,9 +51,8 @@ MultiplexedConnections::MultiplexedConnections(std::shared_ptr<Connection> conne
|
||||
}
|
||||
|
||||
MultiplexedConnections::MultiplexedConnections(
|
||||
std::vector<IConnectionPool::Entry> && connections,
|
||||
const Settings & settings_, const ThrottlerPtr & throttler)
|
||||
: settings(settings_)
|
||||
std::vector<IConnectionPool::Entry> && connections, ContextPtr context_, const ThrottlerPtr & throttler)
|
||||
: context(std::move(context_)), settings(context->getSettingsRef())
|
||||
{
|
||||
/// If we didn't get any connections from pool and getMany() did not throw exceptions, this means that
|
||||
/// `skip_unavailable_shards` was set. Then just return.
|
||||
@ -156,18 +156,18 @@ void MultiplexedConnections::sendQuery(
|
||||
/// all servers involved in the distributed query processing.
|
||||
modified_settings.set("allow_experimental_analyzer", static_cast<bool>(modified_settings.allow_experimental_analyzer));
|
||||
|
||||
const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0;
|
||||
const bool enable_offset_parallel_processing = context->canUseOffsetParallelReplicas();
|
||||
|
||||
size_t num_replicas = replica_states.size();
|
||||
if (num_replicas > 1)
|
||||
{
|
||||
if (enable_sample_offset_parallel_processing)
|
||||
if (enable_offset_parallel_processing)
|
||||
/// Use multiple replicas for parallel query processing.
|
||||
modified_settings.parallel_replicas_count = num_replicas;
|
||||
|
||||
for (size_t i = 0; i < num_replicas; ++i)
|
||||
{
|
||||
if (enable_sample_offset_parallel_processing)
|
||||
if (enable_offset_parallel_processing)
|
||||
modified_settings.parallel_replica_offset = i;
|
||||
|
||||
replica_states[i].connection->sendQuery(
|
||||
|
@ -10,7 +10,6 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
/** To retrieve data directly from multiple replicas (connections) from one shard
|
||||
* within a single thread. As a degenerate case, it can also work with one connection.
|
||||
* It is assumed that all functions except sendCancel are always executed in one thread.
|
||||
@ -21,14 +20,12 @@ class MultiplexedConnections final : public IConnections
|
||||
{
|
||||
public:
|
||||
/// Accepts ready connection.
|
||||
MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler_);
|
||||
MultiplexedConnections(Connection & connection, ContextPtr context_, const ThrottlerPtr & throttler_);
|
||||
/// Accepts ready connection and keep it alive before drain
|
||||
MultiplexedConnections(std::shared_ptr<Connection> connection_, const Settings & settings_, const ThrottlerPtr & throttler_);
|
||||
MultiplexedConnections(std::shared_ptr<Connection> connection_, ContextPtr context_, const ThrottlerPtr & throttler_);
|
||||
|
||||
/// Accepts a vector of connections to replicas of one shard already taken from pool.
|
||||
MultiplexedConnections(
|
||||
std::vector<IConnectionPool::Entry> && connections,
|
||||
const Settings & settings_, const ThrottlerPtr & throttler_);
|
||||
MultiplexedConnections(std::vector<IConnectionPool::Entry> && connections, ContextPtr context_, const ThrottlerPtr & throttler_);
|
||||
|
||||
void sendScalarsData(Scalars & data) override;
|
||||
void sendExternalTablesData(std::vector<ExternalTablesData> & data) override;
|
||||
@ -86,6 +83,7 @@ private:
|
||||
/// Mark the replica as invalid.
|
||||
void invalidateReplica(ReplicaState & replica_state);
|
||||
|
||||
ContextPtr context;
|
||||
const Settings & settings;
|
||||
|
||||
/// The current number of valid connections to the replicas of this shard.
|
||||
|
@ -711,7 +711,13 @@ void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_c
|
||||
ColumnPtr ColumnTuple::compress() const
|
||||
{
|
||||
if (columns.empty())
|
||||
return Ptr();
|
||||
{
|
||||
return ColumnCompressed::create(size(), 0,
|
||||
[n = column_length]
|
||||
{
|
||||
return ColumnTuple::create(n);
|
||||
});
|
||||
}
|
||||
|
||||
size_t byte_size = 0;
|
||||
Columns compressed;
|
||||
|
@ -5,7 +5,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size = 2)
|
||||
static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size)
|
||||
{
|
||||
if ((end - pos) & 1)
|
||||
{
|
||||
@ -23,7 +23,7 @@ static void inline hexStringDecode(const char * pos, const char * end, char *& o
|
||||
++out;
|
||||
}
|
||||
|
||||
static void inline binStringDecode(const char * pos, const char * end, char *& out)
|
||||
static void inline binStringDecode(const char * pos, const char * end, char *& out, size_t word_size)
|
||||
{
|
||||
if (pos == end)
|
||||
{
|
||||
@ -53,7 +53,7 @@ static void inline binStringDecode(const char * pos, const char * end, char *& o
|
||||
++out;
|
||||
}
|
||||
|
||||
assert((end - pos) % 8 == 0);
|
||||
chassert((end - pos) % word_size == 0);
|
||||
|
||||
while (end - pos != 0)
|
||||
{
|
||||
|
@ -267,7 +267,7 @@
|
||||
M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \
|
||||
M(S3Requests, "S3 requests count") \
|
||||
M(KeeperAliveConnections, "Number of alive connections") \
|
||||
M(KeeperOutstandingRequets, "Number of outstanding requests") \
|
||||
M(KeeperOutstandingRequests, "Number of outstanding requests") \
|
||||
M(ThreadsInOvercommitTracker, "Number of waiting threads inside of OvercommitTracker") \
|
||||
M(IOUringPendingEvents, "Number of io_uring SQEs waiting to be submitted") \
|
||||
M(IOUringInFlightEvents, "Number of io_uring SQEs in flight") \
|
||||
|
@ -372,7 +372,7 @@ extern const std::vector<Event> keeper_profile_events
|
||||
M(AsynchronousReadWait) \
|
||||
M(S3Requests) \
|
||||
M(KeeperAliveConnections) \
|
||||
M(KeeperOutstandingRequets) \
|
||||
M(KeeperOutstandingRequests) \
|
||||
M(ThreadsInOvercommitTracker) \
|
||||
M(IOUringPendingEvents) \
|
||||
M(IOUringInFlightEvents) \
|
||||
|
@ -28,7 +28,7 @@
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric KeeperAliveConnections;
|
||||
extern const Metric KeeperOutstandingRequets;
|
||||
extern const Metric KeeperOutstandingRequests;
|
||||
}
|
||||
|
||||
namespace ProfileEvents
|
||||
@ -139,7 +139,7 @@ void KeeperDispatcher::requestThread()
|
||||
{
|
||||
if (requests_queue->tryPop(request, max_wait))
|
||||
{
|
||||
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
|
||||
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests);
|
||||
if (shutdown_called)
|
||||
break;
|
||||
|
||||
@ -171,7 +171,7 @@ void KeeperDispatcher::requestThread()
|
||||
/// Trying to get batch requests as fast as possible
|
||||
if (requests_queue->tryPop(request))
|
||||
{
|
||||
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
|
||||
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests);
|
||||
/// Don't append read request into batch, we have to process them separately
|
||||
if (!coordination_settings->quorum_reads && request.request->isReadRequest())
|
||||
{
|
||||
@ -419,7 +419,7 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ
|
||||
{
|
||||
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot push request to queue within operation timeout");
|
||||
}
|
||||
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets);
|
||||
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -543,7 +543,7 @@ void KeeperDispatcher::shutdown()
|
||||
/// Set session expired for all pending requests
|
||||
while (requests_queue && requests_queue->tryPop(request_for_session))
|
||||
{
|
||||
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
|
||||
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests);
|
||||
auto response = request_for_session.request->makeResponse();
|
||||
response->error = Coordination::Error::ZSESSIONEXPIRED;
|
||||
setResponse(request_for_session.session_id, response);
|
||||
@ -670,7 +670,7 @@ void KeeperDispatcher::sessionCleanerTask()
|
||||
};
|
||||
if (!requests_queue->push(std::move(request_info)))
|
||||
LOG_INFO(log, "Cannot push close request to queue while cleaning outdated sessions");
|
||||
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets);
|
||||
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests);
|
||||
|
||||
/// Remove session from registered sessions
|
||||
finishSession(dead_session);
|
||||
@ -794,7 +794,7 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms)
|
||||
/// Push new session request to queue
|
||||
if (!requests_queue->tryPush(std::move(request_info), session_timeout_ms))
|
||||
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot push session id request to queue within session timeout");
|
||||
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets);
|
||||
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests);
|
||||
|
||||
if (future.wait_for(std::chrono::milliseconds(session_timeout_ms)) != std::future_status::ready)
|
||||
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot receive session id within session timeout");
|
||||
|
@ -153,6 +153,7 @@ namespace DB
|
||||
M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \
|
||||
M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \
|
||||
M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \
|
||||
M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \
|
||||
M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \
|
||||
M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \
|
||||
|
||||
|
@ -167,9 +167,6 @@ class IColumn;
|
||||
M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \
|
||||
M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \
|
||||
\
|
||||
M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \
|
||||
M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \
|
||||
\
|
||||
M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \
|
||||
M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \
|
||||
M(Bool, alter_move_to_space_execute_async, false, "Execute ALTER TABLE MOVE ... TO [DISK|VOLUME] asynchronously", 0) \
|
||||
@ -205,21 +202,6 @@ class IColumn;
|
||||
M(Bool, group_by_use_nulls, false, "Treat columns mentioned in ROLLUP, CUBE or GROUPING SETS as Nullable", 0) \
|
||||
\
|
||||
M(NonZeroUInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled. Should be always greater than 0", 0) \
|
||||
M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \
|
||||
M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \
|
||||
M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \
|
||||
M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \
|
||||
M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \
|
||||
M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \
|
||||
\
|
||||
M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \
|
||||
M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \
|
||||
M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \
|
||||
M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \
|
||||
M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
|
||||
M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \
|
||||
M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \
|
||||
M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \
|
||||
\
|
||||
M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard.", 0) \
|
||||
\
|
||||
@ -251,8 +233,6 @@ class IColumn;
|
||||
M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \
|
||||
M(Bool, split_parts_ranges_into_intersecting_and_non_intersecting_final, true, "Split parts ranges into intersecting and non intersecting during FINAL optimization", 0) \
|
||||
M(Bool, split_intersecting_parts_ranges_into_layers_final, true, "Split intersecting parts ranges into layers during FINAL optimization", 0) \
|
||||
M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
|
||||
M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \
|
||||
\
|
||||
M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \
|
||||
M(Bool, mysql_map_string_to_text_in_show_columns, true, "If enabled, String type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Has an effect only when the connection is made through the MySQL wire protocol.", 0) \
|
||||
@ -341,7 +321,6 @@ class IColumn;
|
||||
M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \
|
||||
\
|
||||
M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \
|
||||
M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \
|
||||
\
|
||||
M(JoinStrictness, join_default_strictness, JoinStrictness::All, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \
|
||||
M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \
|
||||
@ -392,7 +371,6 @@ class IColumn;
|
||||
M(Bool, empty_result_for_aggregation_by_constant_keys_on_empty_set, true, "Return empty result when aggregating by constant keys on empty set.", 0) \
|
||||
M(Bool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.", 0) \
|
||||
M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \
|
||||
M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \
|
||||
M(Bool, enable_deflate_qpl_codec, false, "Enable/disable the DEFLATE_QPL codec.", 0) \
|
||||
M(Bool, enable_zstd_qat_codec, false, "Enable/disable the ZSTD_QAT codec.", 0) \
|
||||
M(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
|
||||
@ -402,8 +380,7 @@ class IColumn;
|
||||
M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \
|
||||
M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \
|
||||
M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \
|
||||
M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \
|
||||
M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \
|
||||
\
|
||||
M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \
|
||||
M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \
|
||||
\
|
||||
@ -593,13 +570,6 @@ class IColumn;
|
||||
M(UInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up an incredible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.", 0) \
|
||||
M(UInt64, distributed_replica_max_ignored_errors, 0, "Number of errors that will be ignored while choosing replicas", 0) \
|
||||
\
|
||||
M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \
|
||||
M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \
|
||||
M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \
|
||||
M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \
|
||||
M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \
|
||||
M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \
|
||||
M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \
|
||||
M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
|
||||
\
|
||||
M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \
|
||||
@ -639,8 +609,6 @@ class IColumn;
|
||||
M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \
|
||||
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
|
||||
M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \
|
||||
M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW <name> REFRESH ...).", 0) \
|
||||
M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW <name> afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \
|
||||
M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
|
||||
M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
|
||||
M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
|
||||
@ -657,8 +625,6 @@ class IColumn;
|
||||
M(Bool, cast_keep_nullable, false, "CAST operator keep Nullable for result data type", 0) \
|
||||
M(Bool, cast_ipv4_ipv6_default_on_conversion_error, false, "CAST operator into IPv4, CAST operator into IPV6 type, toIPv4, toIPv6 functions will return default value instead of throwing exception on conversion error.", 0) \
|
||||
M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \
|
||||
M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \
|
||||
M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \
|
||||
M(Bool, system_events_show_zero_values, false, "When querying system.events or system.metrics tables, include all metrics, even with zero values.", 0) \
|
||||
M(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, "Defines how MySQL types are converted to corresponding ClickHouse types. A comma separated list in any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. decimal: convert NUMERIC and DECIMAL types to Decimal when precision allows it. datetime64: convert DATETIME and TIMESTAMP types to DateTime64 instead of DateTime when precision is not 0. date2Date32: convert DATE to Date32 instead of Date. Takes precedence over date2String. date2String: convert DATE to String instead of Date. Overridden by datetime64.", 0) \
|
||||
M(Bool, optimize_trivial_insert_select, false, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
|
||||
@ -720,9 +686,6 @@ class IColumn;
|
||||
M(Bool, force_aggregate_partitions_independently, false, "Force the use of optimization when it is applicable, but heuristics decided not to use it", 0) \
|
||||
M(UInt64, max_number_of_partitions_for_independent_aggregation, 128, "Maximal number of partitions in table to apply optimization", 0) \
|
||||
M(Float, min_hit_rate_to_use_consecutive_keys_optimization, 0.5, "Minimal hit rate of a cache which is used for consecutive keys optimization in aggregation to keep it enabled", 0) \
|
||||
/** Experimental feature for moving data between shards. */ \
|
||||
\
|
||||
M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \
|
||||
\
|
||||
M(Bool, engine_file_empty_if_not_exists, false, "Allows to select data from a file engine table without file", 0) \
|
||||
M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \
|
||||
@ -762,6 +725,7 @@ class IColumn;
|
||||
M(Bool, query_plan_push_down_limit, true, "Allow to move LIMITs down in the query plan", 0) \
|
||||
M(Bool, query_plan_split_filter, true, "Allow to split filters in the query plan", 0) \
|
||||
M(Bool, query_plan_merge_expressions, true, "Allow to merge expressions in the query plan", 0) \
|
||||
M(Bool, query_plan_merge_filters, false, "Allow to merge filters in the query plan", 0) \
|
||||
M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \
|
||||
M(Bool, query_plan_convert_outer_join_to_inner_join, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values", 0) \
|
||||
M(Bool, query_plan_optimize_prewhere, true, "Allow to push down filter to PREWHERE expression for supported storages", 0) \
|
||||
@ -903,34 +867,11 @@ class IColumn;
|
||||
M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \
|
||||
M(Bool, cast_string_to_dynamic_use_inference, false, "Use types inference during String to Dynamic conversion", 0) \
|
||||
M(Bool, enable_blob_storage_log, true, "Write information about blob storage operations to system.blob_storage_log table", 0) \
|
||||
\
|
||||
/** Experimental functions */ \
|
||||
M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
|
||||
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
|
||||
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
|
||||
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
|
||||
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
|
||||
M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \
|
||||
M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \
|
||||
M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
|
||||
M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \
|
||||
M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
|
||||
M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \
|
||||
M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \
|
||||
M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
|
||||
M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \
|
||||
M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \
|
||||
M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
|
||||
M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
|
||||
M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
|
||||
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
|
||||
M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \
|
||||
M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
|
||||
M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \
|
||||
M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \
|
||||
M(Bool, print_pretty_type_names, true, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \
|
||||
M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \
|
||||
M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0)\
|
||||
M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0) \
|
||||
M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \
|
||||
M(SQLSecurityType, default_materialized_view_sql_security, SQLSecurityType::DEFINER, "Allows to set a default value for SQL SECURITY option when creating a materialized view.", 0) \
|
||||
M(String, default_view_definer, "CURRENT_USER", "Allows to set a default value for DEFINER option when creating view.", 0) \
|
||||
@ -940,6 +881,81 @@ class IColumn;
|
||||
M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \
|
||||
M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \
|
||||
M(Bool, allow_deprecated_snowflake_conversion_functions, false, "Enables deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \
|
||||
M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
|
||||
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
|
||||
M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \
|
||||
\
|
||||
\
|
||||
/* ###################################### */ \
|
||||
/* ######## EXPERIMENTAL FEATURES ####### */ \
|
||||
/* ###################################### */ \
|
||||
M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
|
||||
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
|
||||
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
|
||||
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
|
||||
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
|
||||
M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \
|
||||
M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \
|
||||
M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
|
||||
M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \
|
||||
M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \
|
||||
M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
|
||||
M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \
|
||||
M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \
|
||||
M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
|
||||
M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \
|
||||
M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \
|
||||
M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
|
||||
M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
|
||||
M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
|
||||
\
|
||||
M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \
|
||||
M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \
|
||||
\
|
||||
/* Parallel replicas */ \
|
||||
M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \
|
||||
M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \
|
||||
M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \
|
||||
M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \
|
||||
M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \
|
||||
M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \
|
||||
M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \
|
||||
M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \
|
||||
M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \
|
||||
M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \
|
||||
M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
|
||||
M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \
|
||||
M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \
|
||||
M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \
|
||||
\
|
||||
M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
|
||||
M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \
|
||||
\
|
||||
M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \
|
||||
\
|
||||
/* Analyzer: It's not experimental anymore (WIP) */ \
|
||||
M(Bool, allow_experimental_analyzer, true, "Allow new query analyzer.", IMPORTANT) \
|
||||
M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \
|
||||
\
|
||||
M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \
|
||||
M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \
|
||||
M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \
|
||||
\
|
||||
M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \
|
||||
M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \
|
||||
M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \
|
||||
M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \
|
||||
\
|
||||
M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW <name> REFRESH ...).", 0) \
|
||||
M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW <name> afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \
|
||||
\
|
||||
M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \
|
||||
M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \
|
||||
\
|
||||
/** Experimental feature for moving data between shards. */ \
|
||||
M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \
|
||||
|
||||
/** End of experimental features */
|
||||
|
||||
// End of COMMON_SETTINGS
|
||||
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS.
|
||||
|
@ -63,6 +63,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
{"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
|
||||
{"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."},
|
||||
{"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
|
||||
{"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"},
|
||||
{"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"},
|
||||
{"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"},
|
||||
{"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"},
|
||||
|
@ -2,9 +2,11 @@
|
||||
#include <DataTypes/Serializations/SerializationDynamic.h>
|
||||
#include <DataTypes/Serializations/SerializationDynamicElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnDynamic.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
#include <Core/Field.h>
|
||||
@ -110,28 +112,58 @@ std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnDa
|
||||
}
|
||||
|
||||
/// Extract nested subcolumn of requested dynamic subcolumn if needed.
|
||||
if (!subcolumn_nested_name.empty())
|
||||
/// If requested subcolumn is null map, it's processed separately as there is no Nullable type yet.
|
||||
bool is_null_map_subcolumn = subcolumn_nested_name == "null";
|
||||
if (is_null_map_subcolumn)
|
||||
{
|
||||
res->type = std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
else if (!subcolumn_nested_name.empty())
|
||||
{
|
||||
res = getSubcolumnData(subcolumn_nested_name, *res, throw_if_null);
|
||||
if (!res)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
res->serialization = std::make_shared<SerializationDynamicElement>(res->serialization, subcolumn_type->getName());
|
||||
res->type = makeNullableOrLowCardinalityNullableSafe(res->type);
|
||||
res->serialization = std::make_shared<SerializationDynamicElement>(res->serialization, subcolumn_type->getName(), is_null_map_subcolumn);
|
||||
/// Make resulting subcolumn Nullable only if type subcolumn can be inside Nullable or can be LowCardinality(Nullable()).
|
||||
bool make_subcolumn_nullable = subcolumn_type->canBeInsideNullable() || subcolumn_type->lowCardinality();
|
||||
if (!is_null_map_subcolumn && make_subcolumn_nullable)
|
||||
res->type = makeNullableOrLowCardinalityNullableSafe(res->type);
|
||||
|
||||
if (data.column)
|
||||
{
|
||||
if (discriminator)
|
||||
{
|
||||
/// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator to
|
||||
/// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to
|
||||
/// create full subcolumn from variant according to discriminators.
|
||||
const auto & variant_column = assert_cast<const ColumnDynamic &>(*data.column).getVariantColumn();
|
||||
auto creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), "", *discriminator, variant_column.localDiscriminatorByGlobal(*discriminator));
|
||||
res->column = creator.create(res->column);
|
||||
std::unique_ptr<ISerialization::ISubcolumnCreator> creator;
|
||||
if (is_null_map_subcolumn)
|
||||
creator = std::make_unique<SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator>(
|
||||
variant_column.getLocalDiscriminatorsPtr(),
|
||||
"",
|
||||
*discriminator,
|
||||
variant_column.localDiscriminatorByGlobal(*discriminator));
|
||||
else
|
||||
creator = std::make_unique<SerializationVariantElement::VariantSubcolumnCreator>(
|
||||
variant_column.getLocalDiscriminatorsPtr(),
|
||||
"",
|
||||
*discriminator,
|
||||
variant_column.localDiscriminatorByGlobal(*discriminator),
|
||||
make_subcolumn_nullable);
|
||||
res->column = creator->create(res->column);
|
||||
}
|
||||
/// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values.
|
||||
else if (is_null_map_subcolumn)
|
||||
{
|
||||
/// Fill null map with 1 when there is no such Dynamic subcolumn.
|
||||
auto column = ColumnUInt8::create();
|
||||
assert_cast<ColumnUInt8 &>(*column).getData().resize_fill(data.column->size(), 1);
|
||||
res->column = std::move(column);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values.
|
||||
auto column = res->type->createColumn();
|
||||
column->insertManyDefaults(data.column->size());
|
||||
res->column = std::move(column);
|
||||
|
@ -173,7 +173,7 @@ bool IDataType::hasDynamicSubcolumns() const
|
||||
auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
|
||||
auto callback = [&](const SubstreamPath &, const String &, const SubstreamData & subcolumn_data)
|
||||
{
|
||||
has_dynamic_subcolumns |= subcolumn_data.type->hasDynamicSubcolumnsData();
|
||||
has_dynamic_subcolumns |= subcolumn_data.type && subcolumn_data.type->hasDynamicSubcolumnsData();
|
||||
};
|
||||
forEachSubcolumn(callback, data);
|
||||
return has_dynamic_subcolumns;
|
||||
|
@ -64,6 +64,9 @@ String ISerialization::Substream::toString() const
|
||||
if (type == VariantElement)
|
||||
return fmt::format("VariantElement({})", variant_element_name);
|
||||
|
||||
if (type == VariantElementNullMap)
|
||||
return fmt::format("VariantElementNullMap({}.null)", variant_element_name);
|
||||
|
||||
return String(magic_enum::enum_name(type));
|
||||
}
|
||||
|
||||
@ -195,6 +198,8 @@ String getNameForSubstreamPath(
|
||||
stream_name += ".variant_offsets";
|
||||
else if (it->type == Substream::VariantElement)
|
||||
stream_name += "." + it->variant_element_name;
|
||||
else if (it->type == Substream::VariantElementNullMap)
|
||||
stream_name += "." + it->variant_element_name + ".null";
|
||||
else if (it->type == SubstreamType::DynamicStructure)
|
||||
stream_name += ".dynamic_structure";
|
||||
}
|
||||
@ -395,7 +400,8 @@ bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t pref
|
||||
return path[last_elem].type == Substream::NullMap
|
||||
|| path[last_elem].type == Substream::TupleElement
|
||||
|| path[last_elem].type == Substream::ArraySizes
|
||||
|| path[last_elem].type == Substream::VariantElement;
|
||||
|| path[last_elem].type == Substream::VariantElement
|
||||
|| path[last_elem].type == Substream::VariantElementNullMap;
|
||||
}
|
||||
|
||||
ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len)
|
||||
|
@ -184,6 +184,7 @@ public:
|
||||
VariantOffsets,
|
||||
VariantElements,
|
||||
VariantElement,
|
||||
VariantElementNullMap,
|
||||
|
||||
DynamicData,
|
||||
DynamicStructure,
|
||||
@ -256,6 +257,8 @@ public:
|
||||
|
||||
bool position_independent_encoding = true;
|
||||
|
||||
bool use_compact_variant_discriminators_serialization = false;
|
||||
|
||||
enum class DynamicStatisticsMode
|
||||
{
|
||||
NONE, /// Don't write statistics.
|
||||
@ -434,6 +437,9 @@ protected:
|
||||
template <typename State, typename StatePtr>
|
||||
State * checkAndGetState(const StatePtr & state) const;
|
||||
|
||||
template <typename State, typename StatePtr>
|
||||
static State * checkAndGetState(const StatePtr & state, const ISerialization * serialization);
|
||||
|
||||
[[noreturn]] void throwUnexpectedDataAfterParsedValue(IColumn & column, ReadBuffer & istr, const FormatSettings &, const String & type_name) const;
|
||||
};
|
||||
|
||||
@ -444,10 +450,16 @@ using SubstreamType = ISerialization::Substream::Type;
|
||||
|
||||
template <typename State, typename StatePtr>
|
||||
State * ISerialization::checkAndGetState(const StatePtr & state) const
|
||||
{
|
||||
return checkAndGetState<State, StatePtr>(state, this);
|
||||
}
|
||||
|
||||
template <typename State, typename StatePtr>
|
||||
State * ISerialization::checkAndGetState(const StatePtr & state, const ISerialization * serialization)
|
||||
{
|
||||
if (!state)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Got empty state for {}", demangle(typeid(*this).name()));
|
||||
"Got empty state for {}", demangle(typeid(*serialization).name()));
|
||||
|
||||
auto * state_concrete = typeid_cast<State *>(state.get());
|
||||
if (!state_concrete)
|
||||
@ -455,7 +467,7 @@ State * ISerialization::checkAndGetState(const StatePtr & state) const
|
||||
auto & state_ref = *state;
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Invalid State for {}. Expected: {}, got {}",
|
||||
demangle(typeid(*this).name()),
|
||||
demangle(typeid(*serialization).name()),
|
||||
demangle(typeid(State).name()),
|
||||
demangle(typeid(state_ref).name()));
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <DataTypes/Serializations/SerializationDynamicElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||
#include <DataTypes/Serializations/SerializationDynamic.h>
|
||||
#include <DataTypes/DataTypeVariant.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
@ -77,7 +78,10 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix(
|
||||
if (auto global_discr = assert_cast<const DataTypeVariant &>(*variant_type).tryGetVariantDiscriminator(dynamic_element_name))
|
||||
{
|
||||
settings.path.push_back(Substream::DynamicData);
|
||||
dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElement>(nested_serialization, dynamic_element_name, *global_discr);
|
||||
if (is_null_map_subcolumn)
|
||||
dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElementNullMap>(dynamic_element_name, *global_discr);
|
||||
else
|
||||
dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElement>(nested_serialization, dynamic_element_name, *global_discr);
|
||||
dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
@ -98,7 +102,16 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
if (!state)
|
||||
{
|
||||
if (is_null_map_subcolumn)
|
||||
{
|
||||
auto mutable_column = result_column->assumeMutable();
|
||||
auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||
data.resize_fill(data.size() + limit, 1);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
auto * dynamic_element_state = checkAndGetState<DeserializeBinaryBulkStateDynamicElement>(state);
|
||||
|
||||
@ -108,6 +121,12 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
else if (is_null_map_subcolumn)
|
||||
{
|
||||
auto mutable_column = result_column->assumeMutable();
|
||||
auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||
data.resize_fill(data.size() + limit, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto mutable_column = result_column->assumeMutable();
|
||||
|
@ -13,11 +13,11 @@ private:
|
||||
/// To be able to deserialize Dynamic element as a subcolumn
|
||||
/// we need its type name and global discriminator.
|
||||
String dynamic_element_name;
|
||||
bool is_null_map_subcolumn;
|
||||
|
||||
public:
|
||||
SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_)
|
||||
: SerializationWrapper(nested_)
|
||||
, dynamic_element_name(dynamic_element_name_)
|
||||
SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, bool is_null_map_subcolumn_ = false)
|
||||
: SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), is_null_map_subcolumn(is_null_map_subcolumn_)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <DataTypes/Serializations/SerializationVariant.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||
#include <DataTypes/Serializations/SerializationNumber.h>
|
||||
#include <DataTypes/Serializations/SerializationNullable.h>
|
||||
#include <DataTypes/Serializations/SerializationNamed.h>
|
||||
@ -30,12 +31,18 @@ namespace ErrorCodes
|
||||
|
||||
struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState
|
||||
{
|
||||
std::vector<ISerialization::SerializeBinaryBulkStatePtr> states;
|
||||
explicit SerializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode)
|
||||
{
|
||||
}
|
||||
|
||||
SerializationVariant::DiscriminatorsSerializationMode discriminators_mode;
|
||||
std::vector<ISerialization::SerializeBinaryBulkStatePtr> variant_states;
|
||||
};
|
||||
|
||||
struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState
|
||||
{
|
||||
std::vector<ISerialization::DeserializeBinaryBulkStatePtr> states;
|
||||
ISerialization::DeserializeBinaryBulkStatePtr discriminators_state;
|
||||
std::vector<ISerialization::DeserializeBinaryBulkStatePtr> variant_states;
|
||||
};
|
||||
|
||||
void SerializationVariant::enumerateStreams(
|
||||
@ -65,13 +72,19 @@ void SerializationVariant::enumerateStreams(
|
||||
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
settings.path.back().creator = std::make_shared<SerializationVariantElement::VariantSubcolumnCreator>(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i);
|
||||
DataTypePtr type = type_variant ? type_variant->getVariant(i) : nullptr;
|
||||
settings.path.back().creator = std::make_shared<SerializationVariantElement::VariantSubcolumnCreator>(
|
||||
local_discriminators,
|
||||
variant_names[i],
|
||||
i,
|
||||
column_variant ? column_variant->localDiscriminatorByGlobal(i) : i,
|
||||
!type || type->canBeInsideNullable() || type->lowCardinality());
|
||||
|
||||
auto variant_data = SubstreamData(variants[i])
|
||||
.withType(type_variant ? type_variant->getVariant(i) : nullptr)
|
||||
.withType(type)
|
||||
.withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr)
|
||||
.withSerializationInfo(data.serialization_info)
|
||||
.withDeserializeState(variant_deserialize_state ? variant_deserialize_state->states[i] : nullptr);
|
||||
.withDeserializeState(variant_deserialize_state ? variant_deserialize_state->variant_states[i] : nullptr);
|
||||
|
||||
addVariantElementToPath(settings.path, i);
|
||||
settings.path.back().data = variant_data;
|
||||
@ -79,6 +92,24 @@ void SerializationVariant::enumerateStreams(
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
/// Variant subcolumns like variant.Type have type Nullable(Type), so we want to support reading null map subcolumn from it: variant.Type.null.
|
||||
/// Nullable column is created during deserialization of a variant subcolumn according to the discriminators, so we don't have actual Nullable
|
||||
/// serialization with null map subcolumn. To be able to read null map subcolumn from the variant subcolumn we use special serialization
|
||||
/// SerializationVariantElementNullMap.
|
||||
auto null_map_data = SubstreamData(std::make_shared<SerializationNumber<UInt8>>())
|
||||
.withType(type_variant ? std::make_shared<DataTypeUInt8>() : nullptr)
|
||||
.withColumn(column_variant ? ColumnUInt8::create() : nullptr);
|
||||
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
settings.path.back().creator = std::make_shared<SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator>(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i);
|
||||
settings.path.push_back(Substream::VariantElementNullMap);
|
||||
settings.path.back().variant_element_name = variant_names[i];
|
||||
settings.path.back().data = null_map_data;
|
||||
callback(settings.path);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
@ -87,17 +118,26 @@ void SerializationVariant::serializeBinaryBulkStatePrefix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
auto * discriminators_stream = settings.getter(settings.path);
|
||||
settings.path.pop_back();
|
||||
|
||||
auto variant_state = std::make_shared<SerializeBinaryBulkStateVariant>();
|
||||
variant_state->states.resize(variants.size());
|
||||
if (!discriminators_stream)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for VariantDiscriminators in SerializationVariant::serializeBinaryBulkStatePrefix");
|
||||
|
||||
UInt64 mode = settings.use_compact_variant_discriminators_serialization ? DiscriminatorsSerializationMode::COMPACT : DiscriminatorsSerializationMode::BASIC;
|
||||
writeBinaryLittleEndian(mode, *discriminators_stream);
|
||||
|
||||
const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
|
||||
auto variant_state = std::make_shared<SerializeBinaryBulkStateVariant>(mode);
|
||||
variant_state->variant_states.resize(variants.size());
|
||||
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->states[i]);
|
||||
variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->variant_states[i]);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
@ -116,7 +156,7 @@ void SerializationVariant::serializeBinaryBulkStateSuffix(
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->states[i]);
|
||||
variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->variant_states[i]);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
settings.path.pop_back();
|
||||
@ -128,14 +168,19 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsDeserializeStatesCache * cache) const
|
||||
{
|
||||
DeserializeBinaryBulkStatePtr discriminators_state = deserializeDiscriminatorsStatePrefix(settings, cache);
|
||||
if (!discriminators_state)
|
||||
return;
|
||||
|
||||
auto variant_state = std::make_shared<DeserializeBinaryBulkStateVariant>();
|
||||
variant_state->states.resize(variants.size());
|
||||
variant_state->discriminators_state = discriminators_state;
|
||||
variant_state->variant_states.resize(variants.size());
|
||||
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
for (size_t i = 0; i < variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i], cache);
|
||||
variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->variant_states[i], cache);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
@ -143,6 +188,29 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix(
|
||||
state = std::move(variant_state);
|
||||
}
|
||||
|
||||
ISerialization::DeserializeBinaryBulkStatePtr SerializationVariant::deserializeDiscriminatorsStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
SubstreamsDeserializeStatesCache * cache)
|
||||
{
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
|
||||
DeserializeBinaryBulkStatePtr discriminators_state = nullptr;
|
||||
if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path))
|
||||
{
|
||||
discriminators_state = cached_state;
|
||||
}
|
||||
else if (auto * discriminators_stream = settings.getter(settings.path))
|
||||
{
|
||||
UInt64 mode;
|
||||
readBinaryLittleEndian(mode, *discriminators_stream);
|
||||
discriminators_state = std::make_shared<DeserializeBinaryBulkStateVariantDiscriminators>(mode);
|
||||
addToSubstreamsDeserializeStatesCache(cache, settings.path, discriminators_state);
|
||||
}
|
||||
|
||||
settings.path.pop_back();
|
||||
return discriminators_state;
|
||||
}
|
||||
|
||||
|
||||
void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(
|
||||
const IColumn & column,
|
||||
@ -165,13 +233,71 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
||||
|
||||
auto * variant_state = checkAndGetState<SerializeBinaryBulkStateVariant>(state);
|
||||
|
||||
/// If offset = 0 and limit == col.size() or we have only NULLs, we don't need to calculate
|
||||
/// Don't write anything if column is empty.
|
||||
if (limit == 0)
|
||||
return;
|
||||
|
||||
/// Write number of rows in this granule in compact mode.
|
||||
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
|
||||
writeVarUInt(UInt64(limit), *discriminators_stream);
|
||||
|
||||
/// If column has only one none empty discriminators and no NULLs we don't need to
|
||||
/// calculate limits for variants and use provided offset/limit.
|
||||
if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
|
||||
{
|
||||
auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr);
|
||||
|
||||
/// In compact mode write the format of the granule and single non-empty discriminator.
|
||||
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
|
||||
{
|
||||
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
|
||||
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
|
||||
}
|
||||
/// For basic mode just serialize this discriminator limit times.
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < limit; ++i)
|
||||
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
|
||||
}
|
||||
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
addVariantElementToPath(settings.path, non_empty_global_discr);
|
||||
/// We can use the same offset/limit as for whole Variant column
|
||||
variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->variant_states[non_empty_global_discr]);
|
||||
variants_statistics[variant_names[non_empty_global_discr]] += limit;
|
||||
settings.path.pop_back();
|
||||
settings.path.pop_back();
|
||||
return;
|
||||
}
|
||||
/// If column has only NULLs, just serialize NULL discriminators.
|
||||
else if (col.hasOnlyNulls())
|
||||
{
|
||||
/// In compact mode write single NULL_DISCRIMINATOR.
|
||||
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
|
||||
{
|
||||
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
|
||||
writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream);
|
||||
}
|
||||
/// In basic mode write NULL_DISCRIMINATOR limit times.
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < limit; ++i)
|
||||
writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/// If offset = 0 and limit == col.size() we don't need to calculate
|
||||
/// offsets and limits for variants and need to just serialize whole columns.
|
||||
if ((offset == 0 && limit == col.size()) || col.hasOnlyNulls())
|
||||
if ((offset == 0 && limit == col.size()))
|
||||
{
|
||||
/// First, serialize discriminators.
|
||||
/// If we have only NULLs or local and global discriminators are the same, just serialize the column as is.
|
||||
if (col.hasOnlyNulls() || col.hasGlobalVariantsOrder())
|
||||
/// Here we are sure that column contains different discriminators, use plain granule format in compact mode.
|
||||
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT)
|
||||
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream);
|
||||
|
||||
/// If local and global discriminators are the same, just serialize the column as is.
|
||||
if (col.hasGlobalVariantsOrder())
|
||||
{
|
||||
SerializationNumber<ColumnVariant::Discriminator>().serializeBinaryBulk(col.getLocalDiscriminatorsColumn(), *discriminators_stream, offset, limit);
|
||||
}
|
||||
@ -188,7 +314,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]);
|
||||
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->variant_states[i]);
|
||||
variants_statistics[variant_names[i]] += col.getVariantByGlobalDiscriminator(i).size();
|
||||
settings.path.pop_back();
|
||||
}
|
||||
@ -196,36 +322,16 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
||||
return;
|
||||
}
|
||||
|
||||
/// If we have only one non empty variant and no NULLs, we can use the same limit offset for this variant.
|
||||
if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
|
||||
{
|
||||
/// First, serialize discriminators.
|
||||
/// We know that all discriminators are the same, so we just need to serialize this discriminator limit times.
|
||||
auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr);
|
||||
for (size_t i = 0; i != limit; ++i)
|
||||
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
|
||||
|
||||
/// Second, serialize non-empty variant (other variants are empty and we can skip their serialization).
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
addVariantElementToPath(settings.path, non_empty_global_discr);
|
||||
/// We can use the same offset/limit as for whole Variant column
|
||||
variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]);
|
||||
variants_statistics[variant_names[non_empty_global_discr]] += limit;
|
||||
settings.path.pop_back();
|
||||
settings.path.pop_back();
|
||||
return;
|
||||
}
|
||||
|
||||
/// In general case we should iterate through local discriminators in range [offset, offset + limit] to serialize global discriminators and calculate offset/limit pair for each variant.
|
||||
const auto & local_discriminators = col.getLocalDiscriminators();
|
||||
const auto & offsets = col.getOffsets();
|
||||
std::vector<std::pair<size_t, size_t>> variant_offsets_and_limits(variants.size(), {0, 0});
|
||||
size_t end = offset + limit;
|
||||
size_t num_non_empty_variants_in_range = 0;
|
||||
ColumnVariant::Discriminator last_non_empty_variant_discr = 0;
|
||||
for (size_t i = offset; i < end; ++i)
|
||||
{
|
||||
auto global_discr = col.globalDiscriminatorByLocal(local_discriminators[i]);
|
||||
writeBinaryLittleEndian(global_discr, *discriminators_stream);
|
||||
|
||||
if (global_discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
{
|
||||
/// If we see this discriminator for the first time, update offset
|
||||
@ -233,9 +339,38 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
||||
variant_offsets_and_limits[global_discr].first = offsets[i];
|
||||
/// Update limit for this discriminator.
|
||||
++variant_offsets_and_limits[global_discr].second;
|
||||
++num_non_empty_variants_in_range;
|
||||
last_non_empty_variant_discr = global_discr;
|
||||
}
|
||||
}
|
||||
|
||||
/// In basic mode just serialize discriminators as is row by row.
|
||||
if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::BASIC)
|
||||
{
|
||||
for (size_t i = offset; i < end; ++i)
|
||||
writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream);
|
||||
}
|
||||
/// In compact mode check if we have the same discriminator for all rows in this granule.
|
||||
/// First, check if all values in granule are NULLs.
|
||||
else if (num_non_empty_variants_in_range == 0)
|
||||
{
|
||||
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
|
||||
writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream);
|
||||
}
|
||||
/// Then, check if there is only 1 variant and no NULLs in this granule.
|
||||
else if (num_non_empty_variants_in_range == 1 && variant_offsets_and_limits[last_non_empty_variant_discr].second == limit)
|
||||
{
|
||||
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream);
|
||||
writeBinaryLittleEndian(last_non_empty_variant_discr, *discriminators_stream);
|
||||
}
|
||||
/// Otherwise there are different discriminators in this granule.
|
||||
else
|
||||
{
|
||||
writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream);
|
||||
for (size_t i = offset; i < end; ++i)
|
||||
writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream);
|
||||
}
|
||||
|
||||
/// Serialize variants in global order.
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
@ -249,7 +384,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian
|
||||
variant_offsets_and_limits[i].first,
|
||||
variant_offsets_and_limits[i].second,
|
||||
settings,
|
||||
variant_state->states[i]);
|
||||
variant_state->variant_states[i]);
|
||||
variants_statistics[variant_names[i]] += variant_offsets_and_limits[i].second;
|
||||
settings.path.pop_back();
|
||||
}
|
||||
@ -284,39 +419,68 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
|
||||
|
||||
/// First, deserialize discriminators.
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
|
||||
DeserializeBinaryBulkStateVariant * variant_state = nullptr;
|
||||
std::vector<size_t> variant_limits;
|
||||
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
|
||||
{
|
||||
variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
|
||||
col.getLocalDiscriminatorsPtr() = cached_discriminators;
|
||||
}
|
||||
else
|
||||
else if (auto * discriminators_stream = settings.getter(settings.path))
|
||||
{
|
||||
auto * discriminators_stream = settings.getter(settings.path);
|
||||
if (!discriminators_stream)
|
||||
return;
|
||||
variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
|
||||
auto * discriminators_state = checkAndGetState<DeserializeBinaryBulkStateVariantDiscriminators>(variant_state->discriminators_state);
|
||||
|
||||
/// Deserialize discriminators according to serialization mode.
|
||||
if (discriminators_state->mode.value == DiscriminatorsSerializationMode::BASIC)
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0);
|
||||
else
|
||||
variant_limits = deserializeCompactDiscriminators(col.getLocalDiscriminatorsPtr(), limit, discriminators_stream, settings.continuous_reading, *discriminators_state);
|
||||
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0);
|
||||
addToSubstreamsCache(cache, settings.path, col.getLocalDiscriminatorsPtr());
|
||||
}
|
||||
/// It may happen that there is no such stream, in this case just do nothing.
|
||||
else
|
||||
{
|
||||
settings.path.pop_back();
|
||||
return;
|
||||
}
|
||||
|
||||
settings.path.pop_back();
|
||||
|
||||
/// Second, calculate limits for each variant by iterating through new discriminators.
|
||||
std::vector<size_t> variant_limits(variants.size(), 0);
|
||||
auto & discriminators_data = col.getLocalDiscriminators();
|
||||
size_t discriminators_offset = discriminators_data.size() - limit;
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
/// Second, calculate limits for each variant by iterating through new discriminators
|
||||
/// if we didn't do it during discriminators deserialization.
|
||||
if (variant_limits.empty())
|
||||
{
|
||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
++variant_limits[discr];
|
||||
variant_limits.resize(variants.size(), 0);
|
||||
auto & discriminators_data = col.getLocalDiscriminators();
|
||||
|
||||
/// We can actually read less than limit discriminators and we cannot determine the actual number of read rows
|
||||
/// by discriminators column as it could be taken from the substreams cache. And we need actual number of read
|
||||
/// rows to fill offsets correctly later if they are not in the cache. We can determine if offsets column is in cache
|
||||
/// or not by comparing it with discriminators column size (they should be the same when offsets are in cache).
|
||||
/// If offsets are not in the cache, we can use it's size to determine the actual number of read rows.
|
||||
size_t num_new_discriminators = limit;
|
||||
size_t offsets_size = col.getOffsetsPtr()->size();
|
||||
if (discriminators_data.size() > offsets_size)
|
||||
num_new_discriminators = discriminators_data.size() - offsets_size;
|
||||
size_t discriminators_offset = discriminators_data.size() - num_new_discriminators;
|
||||
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
{
|
||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
++variant_limits[discr];
|
||||
}
|
||||
}
|
||||
|
||||
/// Now we can deserialize variants according to their limits.
|
||||
auto * variant_state = checkAndGetState<DeserializeBinaryBulkStateVariant>(state);
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->states[i], cache);
|
||||
variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->variant_states[i], cache);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
settings.path.pop_back();
|
||||
@ -336,20 +500,49 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & offsets = col.getOffsets();
|
||||
offsets.reserve(offsets.size() + limit);
|
||||
std::vector<size_t> variant_offsets;
|
||||
variant_offsets.reserve(variants.size());
|
||||
size_t num_non_empty_variants = 0;
|
||||
ColumnVariant::Discriminator last_non_empty_discr = 0;
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]);
|
||||
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
{
|
||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||
if (discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
offsets.emplace_back();
|
||||
else
|
||||
offsets.push_back(variant_offsets[discr]++);
|
||||
if (variant_limits[i])
|
||||
{
|
||||
++num_non_empty_variants;
|
||||
last_non_empty_discr = i;
|
||||
}
|
||||
|
||||
variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]);
|
||||
}
|
||||
|
||||
auto & discriminators_data = col.getLocalDiscriminators();
|
||||
auto & offsets = col.getOffsets();
|
||||
size_t num_new_offsets = discriminators_data.size() - offsets.size();
|
||||
offsets.reserve(offsets.size() + num_new_offsets);
|
||||
/// If there are only NULLs were read, fill offsets with 0.
|
||||
if (num_non_empty_variants == 0)
|
||||
{
|
||||
offsets.resize_fill(discriminators_data.size(), 0);
|
||||
}
|
||||
/// If there is only 1 variant and no NULLs was read, fill offsets with sequential offsets of this variant.
|
||||
else if (num_non_empty_variants == 1 && variant_limits[last_non_empty_discr] == num_new_offsets)
|
||||
{
|
||||
size_t first_offset = col.getVariantByLocalDiscriminator(last_non_empty_discr).size() - num_new_offsets;
|
||||
for (size_t i = 0; i != num_new_offsets; ++i)
|
||||
offsets.push_back(first_offset + i);
|
||||
}
|
||||
/// Otherwise iterate through discriminators and fill offsets accordingly.
|
||||
else
|
||||
{
|
||||
size_t start = offsets.size();
|
||||
for (size_t i = start; i != discriminators_data.size(); ++i)
|
||||
{
|
||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||
if (discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||
offsets.emplace_back();
|
||||
else
|
||||
offsets.push_back(variant_offsets[discr]++);
|
||||
}
|
||||
}
|
||||
|
||||
addToSubstreamsCache(cache, settings.path, col.getOffsetsPtr());
|
||||
@ -357,6 +550,72 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
std::vector<size_t> SerializationVariant::deserializeCompactDiscriminators(
|
||||
DB::ColumnPtr & discriminators_column,
|
||||
size_t limit,
|
||||
ReadBuffer * stream,
|
||||
bool continuous_reading,
|
||||
DeserializeBinaryBulkStateVariantDiscriminators & state) const
|
||||
{
|
||||
auto & discriminators = assert_cast<ColumnVariant::ColumnDiscriminators &>(*discriminators_column->assumeMutable());
|
||||
auto & discriminators_data = discriminators.getData();
|
||||
|
||||
/// Reset state if we are reading from the start of the granule and not from the previous position in the file.
|
||||
if (!continuous_reading)
|
||||
state.remaining_rows_in_granule = 0;
|
||||
|
||||
/// Calculate limits for variants during discriminators deserialization.
|
||||
std::vector<size_t> variant_limits(variants.size(), 0);
|
||||
while (limit)
|
||||
{
|
||||
/// If we read all rows from current granule, start reading the next one.
|
||||
if (state.remaining_rows_in_granule == 0)
|
||||
{
|
||||
if (stream->eof())
|
||||
return variant_limits;
|
||||
|
||||
readDiscriminatorsGranuleStart(state, stream);
|
||||
}
|
||||
|
||||
size_t limit_in_granule = std::min(limit, state.remaining_rows_in_granule);
|
||||
if (state.granule_format == CompactDiscriminatorsGranuleFormat::COMPACT)
|
||||
{
|
||||
auto & data = discriminators.getData();
|
||||
data.resize_fill(data.size() + limit_in_granule, state.compact_discr);
|
||||
if (state.compact_discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
variant_limits[state.compact_discr] += limit_in_granule;
|
||||
}
|
||||
else
|
||||
{
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0);
|
||||
size_t start = discriminators_data.size() - limit_in_granule;
|
||||
for (size_t i = start; i != discriminators_data.size(); ++i)
|
||||
{
|
||||
ColumnVariant::Discriminator discr = discriminators_data[i];
|
||||
if (discr != ColumnVariant::NULL_DISCRIMINATOR)
|
||||
++variant_limits[discr];
|
||||
}
|
||||
}
|
||||
|
||||
state.remaining_rows_in_granule -= limit_in_granule;
|
||||
limit -= limit_in_granule;
|
||||
}
|
||||
|
||||
return variant_limits;
|
||||
}
|
||||
|
||||
void SerializationVariant::readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, DB::ReadBuffer * stream)
|
||||
{
|
||||
UInt64 granule_size;
|
||||
readVarUInt(granule_size, *stream);
|
||||
state.remaining_rows_in_granule = granule_size;
|
||||
UInt8 granule_format;
|
||||
readBinaryLittleEndian(granule_format, *stream);
|
||||
state.granule_format = static_cast<CompactDiscriminatorsGranuleFormat>(granule_format);
|
||||
if (granule_format == CompactDiscriminatorsGranuleFormat::COMPACT)
|
||||
readBinaryLittleEndian(state.compact_discr, *stream);
|
||||
}
|
||||
|
||||
void SerializationVariant::addVariantElementToPath(DB::ISerialization::SubstreamPath & path, size_t i) const
|
||||
{
|
||||
path.push_back(Substream::VariantElement);
|
||||
|
@ -2,10 +2,18 @@
|
||||
|
||||
#include <DataTypes/Serializations/ISerialization.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
|
||||
/// Class for serializing/deserializing column with Variant type.
|
||||
/// It supports both text and binary bulk serializations/deserializations.
|
||||
///
|
||||
@ -18,6 +26,17 @@ namespace DB
|
||||
///
|
||||
/// During binary bulk serialization it transforms local discriminators
|
||||
/// to global and serializes them into a separate stream VariantDiscriminators.
|
||||
/// There are 2 modes of serialising discriminators:
|
||||
/// Basic mode, when all discriminators are serialized as is row by row.
|
||||
/// Compact mode, when we avoid writing the same discriminators in granules when there is
|
||||
/// only one variant (or only NULLs) in the granule.
|
||||
/// In compact mode we serialize granules in the following format:
|
||||
/// <number of rows in granule><granule format><granule data>
|
||||
/// There are 2 different formats of granule - plain and compact.
|
||||
/// Plain format is used when there are different discriminators in this granule,
|
||||
/// in this format all discriminators are serialized as is row by row.
|
||||
/// Compact format is used when all discriminators are the same in this granule,
|
||||
/// in this case only this single discriminator is serialized.
|
||||
/// Each variant is serialized into a separate stream with path VariantElements/VariantElement
|
||||
/// (VariantElements stream is needed for correct sub-columns creation). We store and serialize
|
||||
/// variants in a sparse form (the size of a variant column equals to the number of its discriminator
|
||||
@ -32,6 +51,25 @@ namespace DB
|
||||
class SerializationVariant : public ISerialization
|
||||
{
|
||||
public:
|
||||
struct DiscriminatorsSerializationMode
|
||||
{
|
||||
enum Value
|
||||
{
|
||||
BASIC = 0, /// Store the whole discriminators column.
|
||||
COMPACT = 1, /// Don't write discriminators in granule if all of them are the same.
|
||||
};
|
||||
|
||||
static void checkMode(UInt64 mode)
|
||||
{
|
||||
if (mode > Value::COMPACT)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for SerializationVariant discriminators column.");
|
||||
}
|
||||
|
||||
explicit DiscriminatorsSerializationMode(UInt64 mode) : value(static_cast<Value>(mode)) { checkMode(mode); }
|
||||
|
||||
Value value;
|
||||
};
|
||||
|
||||
using VariantSerializations = std::vector<SerializationPtr>;
|
||||
|
||||
explicit SerializationVariant(
|
||||
@ -123,8 +161,44 @@ public:
|
||||
static std::vector<size_t> getVariantsDeserializeTextOrder(const DataTypes & variant_types);
|
||||
|
||||
private:
|
||||
friend SerializationVariantElement;
|
||||
friend SerializationVariantElementNullMap;
|
||||
|
||||
void addVariantElementToPath(SubstreamPath & path, size_t i) const;
|
||||
|
||||
enum CompactDiscriminatorsGranuleFormat
|
||||
{
|
||||
PLAIN = 0, /// Granule has different discriminators and they are serialized as is row by row.
|
||||
COMPACT = 1, /// Granule has single discriminator for all rows and it is serialized as single value.
|
||||
};
|
||||
|
||||
struct DeserializeBinaryBulkStateVariantDiscriminators : public ISerialization::DeserializeBinaryBulkState
|
||||
{
|
||||
explicit DeserializeBinaryBulkStateVariantDiscriminators(UInt64 mode_) : mode(mode_)
|
||||
{
|
||||
}
|
||||
|
||||
DiscriminatorsSerializationMode mode;
|
||||
|
||||
/// Deserialize state of currently read granule in compact mode.
|
||||
CompactDiscriminatorsGranuleFormat granule_format = CompactDiscriminatorsGranuleFormat::PLAIN;
|
||||
size_t remaining_rows_in_granule = 0;
|
||||
ColumnVariant::Discriminator compact_discr = 0;
|
||||
};
|
||||
|
||||
static DeserializeBinaryBulkStatePtr deserializeDiscriminatorsStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
SubstreamsDeserializeStatesCache * cache);
|
||||
|
||||
std::vector<size_t> deserializeCompactDiscriminators(
|
||||
ColumnPtr & discriminators_column,
|
||||
size_t limit,
|
||||
ReadBuffer * stream,
|
||||
bool continuous_reading,
|
||||
DeserializeBinaryBulkStateVariantDiscriminators & state) const;
|
||||
|
||||
static void readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, ReadBuffer * stream);
|
||||
|
||||
bool tryDeserializeTextEscapedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||
bool tryDeserializeTextQuotedImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||
bool tryDeserializeWholeTextImpl(IColumn & column, const String & field, const FormatSettings & settings) const;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationNumber.h>
|
||||
#include <DataTypes/Serializations/SerializationVariant.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -12,7 +13,7 @@ namespace ErrorCodes
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState
|
||||
struct SerializationVariantElement::DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState
|
||||
{
|
||||
/// During deserialization discriminators and variant streams can be shared.
|
||||
/// For example we can read several variant elements together: "select v.UInt32, v.String from table",
|
||||
@ -24,7 +25,7 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria
|
||||
/// substream cache correctly.
|
||||
ColumnPtr discriminators;
|
||||
ColumnPtr variant;
|
||||
|
||||
ISerialization::DeserializeBinaryBulkStatePtr discriminators_state;
|
||||
ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
|
||||
};
|
||||
|
||||
@ -65,7 +66,12 @@ void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinary
|
||||
void SerializationVariantElement::deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const
|
||||
{
|
||||
DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache);
|
||||
if (!discriminators_state)
|
||||
return;
|
||||
|
||||
auto variant_element_state = std::make_shared<DeserializeBinaryBulkStateVariantElement>();
|
||||
variant_element_state->discriminators_state = discriminators_state;
|
||||
|
||||
addVariantToPath(settings.path);
|
||||
nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state, cache);
|
||||
@ -86,35 +92,61 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
auto * variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
|
||||
|
||||
/// First, deserialize discriminators from Variant column.
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
|
||||
DeserializeBinaryBulkStateVariantElement * variant_element_state = nullptr;
|
||||
std::optional<size_t> variant_limit;
|
||||
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
|
||||
{
|
||||
variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
|
||||
variant_element_state->discriminators = cached_discriminators;
|
||||
}
|
||||
else
|
||||
else if (auto * discriminators_stream = settings.getter(settings.path))
|
||||
{
|
||||
auto * discriminators_stream = settings.getter(settings.path);
|
||||
if (!discriminators_stream)
|
||||
return;
|
||||
variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
|
||||
auto * discriminators_state = checkAndGetState<SerializationVariant::DeserializeBinaryBulkStateVariantDiscriminators>(variant_element_state->discriminators_state);
|
||||
|
||||
/// If we started to read a new column, reinitialize discriminators column in deserialization state.
|
||||
if (!variant_element_state->discriminators || result_column->empty())
|
||||
variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create();
|
||||
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
|
||||
/// Deserialize discriminators according to serialization mode.
|
||||
if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC)
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
|
||||
else
|
||||
variant_limit = deserializeCompactDiscriminators(
|
||||
variant_element_state->discriminators,
|
||||
variant_discriminator,
|
||||
limit,
|
||||
discriminators_stream,
|
||||
settings.continuous_reading,
|
||||
variant_element_state->discriminators_state,
|
||||
this);
|
||||
|
||||
addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators);
|
||||
}
|
||||
else
|
||||
{
|
||||
settings.path.pop_back();
|
||||
return;
|
||||
}
|
||||
|
||||
settings.path.pop_back();
|
||||
|
||||
/// Iterate through new discriminators to calculate the limit for our variant.
|
||||
/// We could read less than limit discriminators, but we will need actual number of read rows later.
|
||||
size_t num_new_discriminators = variant_element_state->discriminators->size() - result_column->size();
|
||||
|
||||
/// Iterate through new discriminators to calculate the limit for our variant
|
||||
/// if we didn't do it during discriminators deserialization.
|
||||
const auto & discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*variant_element_state->discriminators).getData();
|
||||
size_t discriminators_offset = variant_element_state->discriminators->size() - limit;
|
||||
size_t variant_limit = 0;
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
variant_limit += (discriminators_data[i] == variant_discriminator);
|
||||
size_t discriminators_offset = variant_element_state->discriminators->size() - num_new_discriminators;
|
||||
if (!variant_limit)
|
||||
{
|
||||
variant_limit = 0;
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
*variant_limit += (discriminators_data[i] == variant_discriminator);
|
||||
}
|
||||
|
||||
/// Now we know the limit for our variant and can deserialize it.
|
||||
|
||||
@ -125,19 +157,19 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
auto & nullable_column = assert_cast<ColumnNullable &>(*mutable_column);
|
||||
NullMap & null_map = nullable_column.getNullMapData();
|
||||
/// If we have only our discriminator in range, fill null map with 0.
|
||||
if (variant_limit == limit)
|
||||
if (variant_limit == num_new_discriminators)
|
||||
{
|
||||
null_map.resize_fill(null_map.size() + limit, 0);
|
||||
null_map.resize_fill(null_map.size() + num_new_discriminators, 0);
|
||||
}
|
||||
/// If no our discriminator in current range, fill null map with 1.
|
||||
else if (variant_limit == 0)
|
||||
{
|
||||
null_map.resize_fill(null_map.size() + limit, 1);
|
||||
null_map.resize_fill(null_map.size() + num_new_discriminators, 1);
|
||||
}
|
||||
/// Otherwise we should iterate through discriminators to fill null map.
|
||||
else
|
||||
{
|
||||
null_map.reserve(null_map.size() + limit);
|
||||
null_map.reserve(null_map.size() + num_new_discriminators);
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
null_map.push_back(discriminators_data[i] != variant_discriminator);
|
||||
}
|
||||
@ -159,12 +191,12 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
/// If nothing to deserialize, just insert defaults.
|
||||
if (variant_limit == 0)
|
||||
{
|
||||
mutable_column->insertManyDefaults(limit);
|
||||
mutable_column->insertManyDefaults(num_new_discriminators);
|
||||
return;
|
||||
}
|
||||
|
||||
addVariantToPath(settings.path);
|
||||
nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, variant_limit, settings, variant_element_state->variant_element_state, cache);
|
||||
nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, *variant_limit, settings, variant_element_state->variant_element_state, cache);
|
||||
removeVariantFromPath(settings.path);
|
||||
|
||||
/// If nothing was deserialized when variant_limit > 0
|
||||
@ -173,16 +205,16 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
/// In this case we should just insert default values.
|
||||
if (variant_element_state->variant->empty())
|
||||
{
|
||||
mutable_column->insertManyDefaults(limit);
|
||||
mutable_column->insertManyDefaults(num_new_discriminators);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t variant_offset = variant_element_state->variant->size() - variant_limit;
|
||||
size_t variant_offset = variant_element_state->variant->size() - *variant_limit;
|
||||
|
||||
/// If we have only our discriminator in range, insert the whole range to result column.
|
||||
if (variant_limit == limit)
|
||||
if (variant_limit == num_new_discriminators)
|
||||
{
|
||||
mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, variant_limit);
|
||||
mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, *variant_limit);
|
||||
}
|
||||
/// Otherwise iterate through discriminators and insert value from variant or default value depending on the discriminator.
|
||||
else
|
||||
@ -197,6 +229,59 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
}
|
||||
}
|
||||
|
||||
size_t SerializationVariantElement::deserializeCompactDiscriminators(
|
||||
DB::ColumnPtr & discriminators_column,
|
||||
ColumnVariant::Discriminator variant_discriminator,
|
||||
size_t limit,
|
||||
DB::ReadBuffer * stream,
|
||||
bool continuous_reading,
|
||||
DeserializeBinaryBulkStatePtr & discriminators_state_,
|
||||
const ISerialization * serialization)
|
||||
{
|
||||
auto * discriminators_state = checkAndGetState<SerializationVariant::DeserializeBinaryBulkStateVariantDiscriminators>(discriminators_state_, serialization);
|
||||
auto & discriminators = assert_cast<ColumnVariant::ColumnDiscriminators &>(*discriminators_column->assumeMutable());
|
||||
auto & discriminators_data = discriminators.getData();
|
||||
|
||||
/// Reset state if we are reading from the start of the granule and not from the previous position in the file.
|
||||
if (!continuous_reading)
|
||||
discriminators_state->remaining_rows_in_granule = 0;
|
||||
|
||||
/// Calculate our variant limit during discriminators deserialization.
|
||||
size_t variant_limit = 0;
|
||||
while (limit)
|
||||
{
|
||||
/// If we read all rows from current granule, start reading the next one.
|
||||
if (discriminators_state->remaining_rows_in_granule == 0)
|
||||
{
|
||||
if (stream->eof())
|
||||
return variant_limit;
|
||||
|
||||
SerializationVariant::readDiscriminatorsGranuleStart(*discriminators_state, stream);
|
||||
}
|
||||
|
||||
size_t limit_in_granule = std::min(limit, discriminators_state->remaining_rows_in_granule);
|
||||
if (discriminators_state->granule_format == SerializationVariant::CompactDiscriminatorsGranuleFormat::COMPACT)
|
||||
{
|
||||
auto & data = discriminators.getData();
|
||||
data.resize_fill(data.size() + limit_in_granule, discriminators_state->compact_discr);
|
||||
if (discriminators_state->compact_discr == variant_discriminator)
|
||||
variant_limit += limit_in_granule;
|
||||
}
|
||||
else
|
||||
{
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0);
|
||||
size_t start = discriminators_data.size() - limit_in_granule;
|
||||
for (size_t i = start; i != discriminators_data.size(); ++i)
|
||||
variant_limit += (discriminators_data[i] == variant_discriminator);
|
||||
}
|
||||
|
||||
discriminators_state->remaining_rows_in_granule -= limit_in_granule;
|
||||
limit -= limit_in_granule;
|
||||
}
|
||||
|
||||
return variant_limit;
|
||||
}
|
||||
|
||||
void SerializationVariantElement::addVariantToPath(DB::ISerialization::SubstreamPath & path) const
|
||||
{
|
||||
path.push_back(Substream::VariantElements);
|
||||
@ -214,17 +299,19 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator(
|
||||
const ColumnPtr & local_discriminators_,
|
||||
const String & variant_element_name_,
|
||||
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||
ColumnVariant::Discriminator local_variant_discriminator_)
|
||||
ColumnVariant::Discriminator local_variant_discriminator_,
|
||||
bool make_nullable_)
|
||||
: local_discriminators(local_discriminators_)
|
||||
, variant_element_name(variant_element_name_)
|
||||
, global_variant_discriminator(global_variant_discriminator_)
|
||||
, local_variant_discriminator(local_variant_discriminator_)
|
||||
, make_nullable(make_nullable_)
|
||||
{
|
||||
}
|
||||
|
||||
DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const
|
||||
{
|
||||
return makeNullableOrLowCardinalityNullableSafe(prev);
|
||||
return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev;
|
||||
}
|
||||
|
||||
SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const
|
||||
@ -237,12 +324,12 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB:
|
||||
/// Case when original Variant column contained only one non-empty variant and no NULLs.
|
||||
/// In this case just use this variant.
|
||||
if (prev->size() == local_discriminators->size())
|
||||
return makeNullableOrLowCardinalityNullableSafe(prev);
|
||||
return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev;
|
||||
|
||||
/// If this variant is empty, fill result column with default values.
|
||||
if (prev->empty())
|
||||
{
|
||||
auto res = makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty();
|
||||
auto res = make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty() : prev->cloneEmpty();
|
||||
res->insertManyDefaults(local_discriminators->size());
|
||||
return res;
|
||||
}
|
||||
@ -257,16 +344,16 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB:
|
||||
/// Now we can create new column from null-map and variant column using IColumn::expand.
|
||||
auto res_column = IColumn::mutate(prev);
|
||||
|
||||
/// Special case for LowCardinality. We want the result to be LowCardinality(Nullable),
|
||||
/// Special case for LowCardinality when we want the result to be LowCardinality(Nullable),
|
||||
/// but we don't have a good way to apply null-mask for LowCardinality(), so, we first
|
||||
/// convert our column to LowCardinality(Nullable()) and then use expand which will
|
||||
/// fill rows with 0 in mask with default value (that is NULL).
|
||||
if (prev->lowCardinality())
|
||||
if (make_nullable && prev->lowCardinality())
|
||||
res_column = assert_cast<ColumnLowCardinality &>(*res_column).cloneNullable();
|
||||
|
||||
res_column->expand(null_map, /*inverted = */ true);
|
||||
|
||||
if (res_column->canBeInsideNullable())
|
||||
if (make_nullable && prev->canBeInsideNullable())
|
||||
{
|
||||
auto null_map_col = ColumnUInt8::create();
|
||||
null_map_col->getData() = std::move(null_map);
|
||||
|
@ -9,6 +9,7 @@ namespace DB
|
||||
{
|
||||
|
||||
class SerializationVariant;
|
||||
class SerializationVariantElementNullMap;
|
||||
|
||||
/// Serialization for Variant element when we read it as a subcolumn.
|
||||
class SerializationVariantElement final : public SerializationWrapper
|
||||
@ -66,12 +67,14 @@ public:
|
||||
const String variant_element_name;
|
||||
const ColumnVariant::Discriminator global_variant_discriminator;
|
||||
const ColumnVariant::Discriminator local_variant_discriminator;
|
||||
bool make_nullable;
|
||||
|
||||
VariantSubcolumnCreator(
|
||||
const ColumnPtr & local_discriminators_,
|
||||
const String & variant_element_name_,
|
||||
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||
ColumnVariant::Discriminator local_variant_discriminator_);
|
||||
ColumnVariant::Discriminator local_variant_discriminator_,
|
||||
bool make_nullable_);
|
||||
|
||||
DataTypePtr create(const DataTypePtr & prev) const override;
|
||||
ColumnPtr create(const ColumnPtr & prev) const override;
|
||||
@ -79,6 +82,18 @@ public:
|
||||
};
|
||||
private:
|
||||
friend SerializationVariant;
|
||||
friend SerializationVariantElementNullMap;
|
||||
|
||||
struct DeserializeBinaryBulkStateVariantElement;
|
||||
|
||||
static size_t deserializeCompactDiscriminators(
|
||||
ColumnPtr & discriminators_column,
|
||||
ColumnVariant::Discriminator variant_discriminator,
|
||||
size_t limit,
|
||||
ReadBuffer * stream,
|
||||
bool continuous_reading,
|
||||
DeserializeBinaryBulkStatePtr & discriminators_state_,
|
||||
const ISerialization * serialization);
|
||||
|
||||
void addVariantToPath(SubstreamPath & path) const;
|
||||
void removeVariantFromPath(SubstreamPath & path) const;
|
||||
|
@ -0,0 +1,190 @@
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/Serializations/SerializationNumber.h>
|
||||
#include <DataTypes/Serializations/SerializationVariant.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElement.h>
|
||||
#include <DataTypes/Serializations/SerializationVariantElementNullMap.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
struct DeserializeBinaryBulkStateVariantElementNullMap : public ISerialization::DeserializeBinaryBulkState
|
||||
{
|
||||
/// During deserialization discriminators streams can be shared.
|
||||
/// For example we can read several variant elements together: "select v.UInt32, v.String.null from table",
|
||||
/// or we can read the whole variant and some of variant elements or their subcolumns: "select v, v.UInt32.null from table".
|
||||
/// To read the same column from the same stream more than once we use substream cache,
|
||||
/// but this cache stores the whole column, not only the current range.
|
||||
/// During deserialization of variant elements or their subcolumns discriminators column is not stored
|
||||
/// in the result column, so we need to store them inside deserialization state, so we can use
|
||||
/// substream cache correctly.
|
||||
ColumnPtr discriminators;
|
||||
ISerialization::DeserializeBinaryBulkStatePtr discriminators_state;
|
||||
};
|
||||
|
||||
void SerializationVariantElementNullMap::enumerateStreams(
|
||||
DB::ISerialization::EnumerateStreamsSettings & settings,
|
||||
const DB::ISerialization::StreamCallback & callback,
|
||||
const DB::ISerialization::SubstreamData &) const
|
||||
{
|
||||
/// We will need stream for discriminators during deserialization.
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
callback(settings.path);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
|
||||
void SerializationVariantElementNullMap::serializeBinaryBulkStatePrefix(
|
||||
const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElementNullMap");
|
||||
}
|
||||
|
||||
void SerializationVariantElementNullMap::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElementNullMap");
|
||||
}
|
||||
|
||||
void SerializationVariantElementNullMap::deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const
|
||||
{
|
||||
DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache);
|
||||
if (!discriminators_state)
|
||||
return;
|
||||
|
||||
auto variant_element_null_map_state = std::make_shared<DeserializeBinaryBulkStateVariantElementNullMap>();
|
||||
variant_element_null_map_state->discriminators_state = std::move(discriminators_state);
|
||||
state = std::move(variant_element_null_map_state);
|
||||
}
|
||||
|
||||
void SerializationVariantElementNullMap::serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED,
|
||||
"Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationVariantElementNullMap");
|
||||
}
|
||||
|
||||
void SerializationVariantElementNullMap::deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & result_column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
/// Deserialize discriminators from Variant column.
|
||||
settings.path.push_back(Substream::VariantDiscriminators);
|
||||
|
||||
DeserializeBinaryBulkStateVariantElementNullMap * variant_element_null_map_state = nullptr;
|
||||
std::optional<size_t> variant_limit;
|
||||
if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
|
||||
{
|
||||
variant_element_null_map_state = checkAndGetState<DeserializeBinaryBulkStateVariantElementNullMap>(state);
|
||||
variant_element_null_map_state->discriminators = cached_discriminators;
|
||||
}
|
||||
else if (auto * discriminators_stream = settings.getter(settings.path))
|
||||
{
|
||||
variant_element_null_map_state = checkAndGetState<DeserializeBinaryBulkStateVariantElementNullMap>(state);
|
||||
auto * discriminators_state = checkAndGetState<SerializationVariant::DeserializeBinaryBulkStateVariantDiscriminators>(
|
||||
variant_element_null_map_state->discriminators_state);
|
||||
|
||||
/// If we started to read a new column, reinitialize discriminators column in deserialization state.
|
||||
if (!variant_element_null_map_state->discriminators || result_column->empty())
|
||||
variant_element_null_map_state->discriminators = ColumnVariant::ColumnDiscriminators::create();
|
||||
|
||||
/// Deserialize discriminators according to serialization mode.
|
||||
if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC)
|
||||
SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(
|
||||
*variant_element_null_map_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
|
||||
else
|
||||
variant_limit = SerializationVariantElement::deserializeCompactDiscriminators(
|
||||
variant_element_null_map_state->discriminators,
|
||||
variant_discriminator,
|
||||
limit,
|
||||
discriminators_stream,
|
||||
settings.continuous_reading,
|
||||
variant_element_null_map_state->discriminators_state,
|
||||
this);
|
||||
|
||||
addToSubstreamsCache(cache, settings.path, variant_element_null_map_state->discriminators);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// There is no such stream or cached data, it means that there is no Variant column in this part (it could happen after alter table add column).
|
||||
/// In such cases columns are filled with default values, but for null-map column default value should be 1, not 0. Fill column with 1 here instead.
|
||||
MutableColumnPtr mutable_column = result_column->assumeMutable();
|
||||
auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||
data.resize_fill(data.size() + limit, 1);
|
||||
settings.path.pop_back();
|
||||
return;
|
||||
}
|
||||
settings.path.pop_back();
|
||||
|
||||
MutableColumnPtr mutable_column = result_column->assumeMutable();
|
||||
auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||
/// Check if there are no such variant in read range.
|
||||
if (variant_limit && *variant_limit == 0)
|
||||
{
|
||||
data.resize_fill(data.size() + limit, 1);
|
||||
}
|
||||
/// Check if there is only our variant in read range.
|
||||
else if (variant_limit && *variant_limit == limit)
|
||||
{
|
||||
data.resize_fill(data.size() + limit, 0);
|
||||
}
|
||||
/// Iterate through new discriminators to calculate the null map of our variant.
|
||||
else
|
||||
{
|
||||
const auto & discriminators_data
|
||||
= assert_cast<const ColumnVariant::ColumnDiscriminators &>(*variant_element_null_map_state->discriminators).getData();
|
||||
size_t discriminators_offset = variant_element_null_map_state->discriminators->size() - limit;
|
||||
for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
|
||||
data.push_back(discriminators_data[i] != variant_discriminator);
|
||||
}
|
||||
}
|
||||
|
||||
SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::VariantNullMapSubcolumnCreator(
|
||||
const ColumnPtr & local_discriminators_,
|
||||
const String & variant_element_name_,
|
||||
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||
ColumnVariant::Discriminator local_variant_discriminator_)
|
||||
: local_discriminators(local_discriminators_)
|
||||
, variant_element_name(variant_element_name_)
|
||||
, global_variant_discriminator(global_variant_discriminator_)
|
||||
, local_variant_discriminator(local_variant_discriminator_)
|
||||
{
|
||||
}
|
||||
|
||||
DataTypePtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::DataTypePtr &) const
|
||||
{
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
|
||||
SerializationPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::SerializationPtr &) const
|
||||
{
|
||||
return std::make_shared<SerializationVariantElementNullMap>(variant_element_name, global_variant_discriminator);
|
||||
}
|
||||
|
||||
ColumnPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::ColumnPtr &) const
|
||||
{
|
||||
/// Iterate through discriminators and create null-map for our variant.
|
||||
auto null_map_col = ColumnUInt8::create();
|
||||
auto & null_map_data = null_map_col->getData();
|
||||
null_map_data.reserve(local_discriminators->size());
|
||||
const auto & local_discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*local_discriminators).getData();
|
||||
for (auto local_discr : local_discriminators_data)
|
||||
null_map_data.push_back(local_discr != local_variant_discriminator);
|
||||
|
||||
return null_map_col;
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,107 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataTypes/Serializations/SimpleTextSerialization.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnVariant.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
class SerializationVariant;
|
||||
class SerializationVariantElement;
|
||||
|
||||
/// Serialization for Variant element null map when we read it as a subcolumn.
|
||||
/// For example, variant.UInt64.null.
|
||||
/// It requires separate serialization because there is no actual Nullable column
|
||||
/// and we should construct null map from variant discriminators.
|
||||
/// The implementation of deserializeBinaryBulk* methods is similar to SerializationVariantElement,
|
||||
/// but differs in that there is no need to read the actual data of the variant, only discriminators.
|
||||
class SerializationVariantElementNullMap final : public SimpleTextSerialization
|
||||
{
|
||||
public:
|
||||
SerializationVariantElementNullMap(const String & variant_element_name_, ColumnVariant::Discriminator variant_discriminator_)
|
||||
: variant_element_name(variant_element_name_), variant_discriminator(variant_discriminator_)
|
||||
{
|
||||
}
|
||||
|
||||
void enumerateStreams(
|
||||
EnumerateStreamsSettings & settings,
|
||||
const StreamCallback & callback,
|
||||
const SubstreamData & data) const override;
|
||||
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void serializeBinaryBulkStateSuffix(
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsDeserializeStatesCache * cache) const override;
|
||||
|
||||
void serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
|
||||
void deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const override;
|
||||
|
||||
void serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
|
||||
void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); }
|
||||
bool tryDeserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); }
|
||||
|
||||
struct VariantNullMapSubcolumnCreator : public ISubcolumnCreator
|
||||
{
|
||||
const ColumnPtr local_discriminators;
|
||||
const String variant_element_name;
|
||||
const ColumnVariant::Discriminator global_variant_discriminator;
|
||||
const ColumnVariant::Discriminator local_variant_discriminator;
|
||||
|
||||
VariantNullMapSubcolumnCreator(
|
||||
const ColumnPtr & local_discriminators_,
|
||||
const String & variant_element_name_,
|
||||
ColumnVariant::Discriminator global_variant_discriminator_,
|
||||
ColumnVariant::Discriminator local_variant_discriminator_);
|
||||
|
||||
DataTypePtr create(const DataTypePtr & prev) const override;
|
||||
ColumnPtr create(const ColumnPtr & prev) const override;
|
||||
SerializationPtr create(const SerializationPtr & prev) const override;
|
||||
};
|
||||
private:
|
||||
[[noreturn]] static void throwNoSerialization()
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Text/binary serialization is not implemented for variant element null map subcolumn");
|
||||
}
|
||||
|
||||
friend SerializationVariant;
|
||||
friend SerializationVariantElement;
|
||||
|
||||
/// To be able to deserialize Variant element null map as a subcolumn
|
||||
/// we need variant element type name and global discriminator.
|
||||
String variant_element_name;
|
||||
ColumnVariant::Discriminator variant_discriminator;
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -27,7 +27,8 @@ DiskPtr DiskFactory::create(
|
||||
ContextPtr context,
|
||||
const DisksMap & map,
|
||||
bool attach,
|
||||
bool custom_disk) const
|
||||
bool custom_disk,
|
||||
const std::unordered_set<String> & skip_types) const
|
||||
{
|
||||
const auto disk_type = config.getString(config_prefix + ".type", "local");
|
||||
|
||||
@ -38,6 +39,11 @@ DiskPtr DiskFactory::create(
|
||||
"DiskFactory: the disk '{}' has unknown disk type: {}", name, disk_type);
|
||||
}
|
||||
|
||||
if (skip_types.contains(found->first))
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const auto & disk_creator = found->second;
|
||||
return disk_creator(name, config, config_prefix, context, map, attach, custom_disk);
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user