diff --git a/README.md b/README.md index dc253d4db2d..98f9108f14c 100644 --- a/README.md +++ b/README.md @@ -34,14 +34,12 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.6 Community Call](https://clickhouse.com/company/events/v24-6-community-release-call) - Jul 2 +* [v24.7 Community Call](https://clickhouse.com/company/events/v24-7-community-release-call) - Jul 30 ## Upcoming Events Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. -* [AWS Summit in DC](https://clickhouse.com/company/events/2024-06-aws-summit-dc) - Jun 26 -* [ClickHouse Meetup in Amsterdam](https://www.meetup.com/clickhouse-netherlands-user-group/events/300781068/) - Jun 27 * [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9 * [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9 * [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9 diff --git a/base/poco/Net/include/Poco/Net/NameValueCollection.h b/base/poco/Net/include/Poco/Net/NameValueCollection.h index be499838d0e..2337535bd11 100644 --- a/base/poco/Net/include/Poco/Net/NameValueCollection.h +++ b/base/poco/Net/include/Poco/Net/NameValueCollection.h @@ -79,7 +79,7 @@ namespace Net /// Returns the value of the first name-value pair with the given name. /// If no value with the given name has been found, the defaultValue is returned. - const std::vector> getAll(const std::string & name) const; + std::vector getAll(const std::string & name) const; /// Returns all values of all name-value pairs with the given name. /// /// Returns an empty vector if there are no name-value pairs with the given name. diff --git a/base/poco/Net/src/NameValueCollection.cpp b/base/poco/Net/src/NameValueCollection.cpp index 783ed48cc30..e35d66d3bde 100644 --- a/base/poco/Net/src/NameValueCollection.cpp +++ b/base/poco/Net/src/NameValueCollection.cpp @@ -102,9 +102,9 @@ const std::string& NameValueCollection::get(const std::string& name, const std:: return defaultValue; } -const std::vector> NameValueCollection::getAll(const std::string& name) const +std::vector NameValueCollection::getAll(const std::string& name) const { - std::vector> values; + std::vector values; for (ConstIterator it = _map.find(name); it != _map.end(); it++) if (it->first == name) values.push_back(it->second); diff --git a/cmake/target.cmake b/cmake/target.cmake index d6c497955f6..3d0ecd032f9 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -84,5 +84,5 @@ if (CMAKE_CROSSCOMPILING) message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!") endif () - message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}") + message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILER_TARGET}") endif () diff --git a/contrib/avro b/contrib/avro index d43acc84d3d..545e7002683 160000 --- a/contrib/avro +++ b/contrib/avro @@ -1 +1 @@ -Subproject commit d43acc84d3d455b016f847d6666fbc3cd27f16a9 +Subproject commit 545e7002683cbc2198164d93088ac8e4955b4628 diff --git a/contrib/aws-cmake/CMakeLists.txt b/contrib/aws-cmake/CMakeLists.txt index abde20addaf..250b47b7c2c 100644 --- a/contrib/aws-cmake/CMakeLists.txt +++ b/contrib/aws-cmake/CMakeLists.txt @@ -125,7 +125,7 @@ configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in" "${CMAKE_CURRENT_BINARY_DIR}/include/aws/core/SDKConfig.h" @ONLY) aws_get_version(AWS_CRT_CPP_VERSION_MAJOR AWS_CRT_CPP_VERSION_MINOR AWS_CRT_CPP_VERSION_PATCH FULL_VERSION GIT_HASH) -configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${AWS_CRT_DIR}/include/aws/crt/Config.h" @ONLY) +configure_file("${AWS_CRT_DIR}/include/aws/crt/Config.h.in" "${CMAKE_CURRENT_BINARY_DIR}/include/aws/crt/Config.h" @ONLY) list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC}) diff --git a/contrib/libpq-cmake/CMakeLists.txt b/contrib/libpq-cmake/CMakeLists.txt index 6a0012c01bf..246e19593f6 100644 --- a/contrib/libpq-cmake/CMakeLists.txt +++ b/contrib/libpq-cmake/CMakeLists.txt @@ -54,7 +54,6 @@ set(SRCS "${LIBPQ_SOURCE_DIR}/port/pgstrcasecmp.c" "${LIBPQ_SOURCE_DIR}/port/thread.c" "${LIBPQ_SOURCE_DIR}/port/path.c" - "${LIBPQ_SOURCE_DIR}/port/explicit_bzero.c" ) add_library(_libpq ${SRCS}) diff --git a/contrib/orc b/contrib/orc index 947cebaf943..bcc025c0982 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit 947cebaf9432d708253ac08dc3012daa6b4ede6f +Subproject commit bcc025c09828c556f54cfbdf83a66b9acae7d17f diff --git a/contrib/rocksdb b/contrib/rocksdb index 3a0b80ca9d6..078fa563869 160000 --- a/contrib/rocksdb +++ b/contrib/rocksdb @@ -1 +1 @@ -Subproject commit 3a0b80ca9d6eebb38fad7ea3f41dfc9db4f6a984 +Subproject commit 078fa5638690004e1f744076d1bdcc4e93767304 diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index c4220ba90ac..943e1d8acbd 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -1,7 +1,7 @@ -option (ENABLE_ROCKSDB "Enable rocksdb library" ${ENABLE_LIBRARIES}) +option (ENABLE_ROCKSDB "Enable RocksDB" ${ENABLE_LIBRARIES}) if (NOT ENABLE_ROCKSDB) - message (STATUS "Not using rocksdb") + message (STATUS "Not using RocksDB") return() endif() diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 018fe57bf56..c59ef1b919a 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/reqgenerator.py b/docker/reqgenerator.py new file mode 100644 index 00000000000..6c1d89ac0ac --- /dev/null +++ b/docker/reqgenerator.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# To run this script you must install docker and piddeptree python package +# + +import subprocess +import os +import sys + + +def build_docker_deps(image_name, imagedir): + cmd = f"""docker run --entrypoint "/bin/bash" {image_name} -c "pip install pipdeptree 2>/dev/null 1>/dev/null && pipdeptree --freeze --warn silence | sed 's/ \+//g' | sort | uniq" > {imagedir}/requirements.txt""" + subprocess.check_call(cmd, shell=True) + + +def check_docker_file_install_with_pip(filepath): + image_name = None + with open(filepath, "r") as f: + for line in f: + if "docker build" in line: + arr = line.split(" ") + if len(arr) > 4: + image_name = arr[4] + if "pip3 install" in line or "pip install" in line: + return image_name, True + return image_name, False + + +def process_affected_images(images_dir): + for root, _dirs, files in os.walk(images_dir): + for f in files: + if f == "Dockerfile": + docker_file_path = os.path.join(root, f) + print("Checking image on path", docker_file_path) + image_name, has_pip = check_docker_file_install_with_pip( + docker_file_path + ) + if has_pip: + print("Find pip in", image_name) + try: + build_docker_deps(image_name, root) + except Exception as ex: + print(ex) + else: + print("Pip not found in", docker_file_path) + + +process_affected_images(sys.argv[1]) diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index a86406e5129..240df79aeb1 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 25f3273a648..ac64655991a 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.6.1.4423" +ARG VERSION="24.6.2.17" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index e0be261d5e8..2512268be0f 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -19,10 +19,7 @@ RUN apt-get update \ odbcinst \ psmisc \ python3 \ - python3-lxml \ python3-pip \ - python3-requests \ - python3-termcolor \ unixodbc \ pv \ jq \ @@ -31,7 +28,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt # This symlink is required by gcc to find the lld linker RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld @@ -39,6 +37,10 @@ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake +# LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path. +# It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792 +RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu + ARG CCACHE_VERSION=4.6.1 RUN mkdir /tmp/ccache \ && cd /tmp/ccache \ diff --git a/docker/test/fasttest/requirements.txt b/docker/test/fasttest/requirements.txt new file mode 100644 index 00000000000..993ea22e5ae --- /dev/null +++ b/docker/test/fasttest/requirements.txt @@ -0,0 +1,41 @@ +Jinja2==3.1.3 +MarkupSafe==2.1.5 +PyJWT==2.3.0 +PyYAML==6.0.1 +Pygments==2.11.2 +SecretStorage==3.3.1 +blinker==1.4 +certifi==2020.6.20 +chardet==4.0.0 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +idna==3.3 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==4.8.0 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pandas==1.5.3 +pip==24.1.1 +pipdeptree==2.23.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.9.0.post0 +pytz==2024.1 +requests==2.32.3 +scipy==1.12.0 +setuptools==59.6.0 +six==1.16.0 +termcolor==1.1.0 +urllib3==1.26.5 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 4d5159cfa9e..c015d3a3542 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -84,6 +84,8 @@ function start_server echo "ClickHouse server pid '$server_pid' started and responded" } +export -f start_server + function clone_root { [ "$UID" -eq 0 ] && git config --global --add safe.directory "$FASTTEST_SOURCE" @@ -254,6 +256,19 @@ function configure rm -f "$FASTTEST_DATA/config.d/secure_ports.xml" } +function timeout_with_logging() { + local exit_code=0 + + timeout -s TERM --preserve-status "${@}" || exit_code="${?}" + + if [[ "${exit_code}" -eq "124" ]] + then + echo "The command 'timeout ${*}' has been killed by timeout" + fi + + return $exit_code +} + function run_tests { clickhouse-server --version @@ -292,6 +307,8 @@ function run_tests clickhouse stop --pid-path "$FASTTEST_DATA" } +export -f run_tests + case "$stage" in "") ls -la @@ -315,7 +332,7 @@ case "$stage" in configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt" ;& "run_tests") - run_tests + timeout_with_logging 35m bash -c run_tests ||: /process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \ --out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \ --out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv" diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index d3f78ac1d95..e1fb09b8ed5 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -31,7 +31,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install Jinja2 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt COPY * / diff --git a/docker/test/fuzzer/requirements.txt b/docker/test/fuzzer/requirements.txt new file mode 100644 index 00000000000..3dce93e023b --- /dev/null +++ b/docker/test/fuzzer/requirements.txt @@ -0,0 +1,27 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.4 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 270b40e23a6..469251f648c 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -33,7 +33,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install pycurl +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r requirements.txt && rm -rf /root/.cache/pip # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH diff --git a/docker/test/integration/base/requirements.txt b/docker/test/integration/base/requirements.txt new file mode 100644 index 00000000000..d195d8deaf6 --- /dev/null +++ b/docker/test/integration/base/requirements.txt @@ -0,0 +1,26 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +pycurl==7.45.3 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/integration/resolver/Dockerfile b/docker/test/integration/resolver/Dockerfile index 01b9b777614..b35a7262651 100644 --- a/docker/test/integration/resolver/Dockerfile +++ b/docker/test/integration/resolver/Dockerfile @@ -2,4 +2,5 @@ # Helper docker container to run python bottle apps FROM python:3 -RUN python -m pip install bottle +COPY requirements.txt / +RUN python -m pip install --no-cache-dir -r requirements.txt diff --git a/docker/test/integration/resolver/requirements.txt b/docker/test/integration/resolver/requirements.txt new file mode 100644 index 00000000000..fbf85295329 --- /dev/null +++ b/docker/test/integration/resolver/requirements.txt @@ -0,0 +1,6 @@ +bottle==0.12.25 +packaging==24.1 +pip==23.2.1 +pipdeptree==2.23.0 +setuptools==69.0.3 +wheel==0.42.0 diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 23d8a37d822..d250b746e7d 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -26,7 +26,6 @@ RUN apt-get update \ libicu-dev \ bsdutils \ curl \ - python3-pika \ liblua5.1-dev \ luajit \ libssl-dev \ @@ -61,49 +60,8 @@ RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \ # kazoo 2.10.0 is broken # https://s3.amazonaws.com/clickhouse-test-reports/59337/524625a1d2f4cc608a3f1059e3df2c30f353a649/integration_tests__asan__analyzer__[5_6].html -RUN python3 -m pip install --no-cache-dir \ - PyMySQL==1.1.0 \ - asyncio==3.4.3 \ - avro==1.10.2 \ - azure-storage-blob==12.19.0 \ - boto3==1.34.24 \ - cassandra-driver==3.29.0 \ - confluent-kafka==2.3.0 \ - delta-spark==2.3.0 \ - dict2xml==1.7.4 \ - dicttoxml==1.7.16 \ - docker==6.1.3 \ - docker-compose==1.29.2 \ - grpcio==1.60.0 \ - grpcio-tools==1.60.0 \ - kafka-python==2.0.2 \ - lz4==4.3.3 \ - minio==7.2.3 \ - nats-py==2.6.0 \ - protobuf==4.25.2 \ - kazoo==2.9.0 \ - psycopg2-binary==2.9.6 \ - pyhdfs==0.3.1 \ - pymongo==3.11.0 \ - pyspark==3.3.2 \ - pytest==7.4.4 \ - pytest-order==1.0.0 \ - pytest-random==0.2 \ - pytest-repeat==0.9.3 \ - pytest-timeout==2.2.0 \ - pytest-xdist==3.5.0 \ - pytest-reportlog==0.4.0 \ - pytz==2023.3.post1 \ - pyyaml==5.3.1 \ - redis==5.0.1 \ - requests-kerberos==0.14.0 \ - tzlocal==2.1 \ - retry==0.9.2 \ - bs4==0.0.2 \ - lxml==5.1.0 \ - urllib3==2.0.7 \ - jwcrypto==1.5.6 -# bs4, lxml are for cloud tests, do not delete +COPY requirements.txt / +RUN python3 -m pip install --no-cache-dir -r requirements.txt # Hudi supports only spark 3.3.*, not 3.4 RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ diff --git a/docker/test/integration/runner/requirements.txt b/docker/test/integration/runner/requirements.txt new file mode 100644 index 00000000000..8a77d8abf77 --- /dev/null +++ b/docker/test/integration/runner/requirements.txt @@ -0,0 +1,113 @@ +PyHDFS==0.3.1 +PyJWT==2.3.0 +PyMySQL==1.1.0 +PyNaCl==1.5.0 +PyYAML==5.3.1 +SecretStorage==3.3.1 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +async-timeout==4.0.3 +asyncio==3.4.3 +attrs==23.2.0 +avro==1.10.2 +azure-core==1.30.1 +azure-storage-blob==12.19.0 +bcrypt==4.1.3 +beautifulsoup4==4.12.3 +blinker==1.4 +boto3==1.34.24 +botocore==1.34.101 +bs4==0.0.2 +cassandra-driver==3.29.0 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +confluent-kafka==2.3.0 +cryptography==3.4.8 +dbus-python==1.2.18 +decorator==5.1.1 +delta-spark==2.3.0 +dict2xml==1.7.4 +dicttoxml==1.7.16 +distro-info==1.1+ubuntu0.2 +distro==1.7.0 +docker-compose==1.29.2 +docker==6.1.3 +dockerpty==0.4.1 +docopt==0.6.2 +exceptiongroup==1.2.1 +execnet==2.1.1 +geomet==0.2.1.post1 +grpcio-tools==1.60.0 +grpcio==1.60.0 +gssapi==1.8.3 +httplib2==0.20.2 +idna==3.7 +importlib-metadata==4.6.4 +iniconfig==2.0.0 +isodate==0.6.1 +jeepney==0.7.1 +jmespath==1.0.1 +jsonschema==3.2.0 +jwcrypto==1.5.6 +kafka-python==2.0.2 +kazoo==2.9.0 +keyring==23.5.0 +krb5==0.5.1 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==5.1.0 +lz4==4.3.3 +minio==7.2.3 +more-itertools==8.10.0 +nats-py==2.6.0 +oauthlib==3.2.0 +packaging==24.0 +paramiko==3.4.0 +pika==1.2.0 +pip==24.1.1 +pipdeptree==2.23.0 +pluggy==1.5.0 +protobuf==4.25.2 +psycopg2-binary==2.9.6 +py4j==0.10.9.5 +py==1.11.0 +pycparser==2.22 +pycryptodome==3.20.0 +pymongo==3.11.0 +pyparsing==2.4.7 +pyrsistent==0.20.0 +pyspark==3.3.2 +pyspnego==0.10.2 +pytest-order==1.0.0 +pytest-random==0.2 +pytest-repeat==0.9.3 +pytest-reportlog==0.4.0 +pytest-timeout==2.2.0 +pytest-xdist==3.5.0 +pytest==7.4.4 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.9.0.post0 +python-dotenv==0.21.1 +pytz==2023.3.post1 +redis==5.0.1 +requests-kerberos==0.14.0 +requests==2.31.0 +retry==0.9.2 +s3transfer==0.10.1 +setuptools==59.6.0 +simplejson==3.19.2 +six==1.16.0 +soupsieve==2.5 +texttable==1.7.0 +tomli==2.0.1 +typing_extensions==4.11.0 +tzlocal==2.1 +unattended-upgrades==0.1 +urllib3==2.0.7 +wadllib==1.3.6 +websocket-client==0.59.0 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile index c9802a0e44e..e6eb2ae336e 100644 --- a/docker/test/libfuzzer/Dockerfile +++ b/docker/test/libfuzzer/Dockerfile @@ -1,3 +1,4 @@ +# docker build -t clickhouse/libfuzzer . ARG FROM_TAG=latest FROM clickhouse/test-base:$FROM_TAG @@ -29,7 +30,8 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install Jinja2 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt COPY * / diff --git a/docker/test/libfuzzer/requirements.txt b/docker/test/libfuzzer/requirements.txt new file mode 100644 index 00000000000..3dce93e023b --- /dev/null +++ b/docker/test/libfuzzer/requirements.txt @@ -0,0 +1,27 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.4 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 1835900b316..c68a39f6f70 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -23,7 +23,6 @@ RUN apt-get update \ python3 \ python3-dev \ python3-pip \ - python3-setuptools \ rsync \ tree \ tzdata \ @@ -33,12 +32,14 @@ RUN apt-get update \ cargo \ ripgrep \ zstd \ - && pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \ && apt-get purge --yes python3-dev g++ \ && apt-get autoremove --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* +COPY requirements.txt / +RUN pip3 --no-cache-dir install -r requirements.txt + COPY run.sh / CMD ["bash", "/run.sh"] diff --git a/docker/test/performance-comparison/requirements.txt b/docker/test/performance-comparison/requirements.txt new file mode 100644 index 00000000000..932527cc022 --- /dev/null +++ b/docker/test/performance-comparison/requirements.txt @@ -0,0 +1,32 @@ +blinker==1.4 +clickhouse-driver==0.2.7 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +Pygments==2.11.2 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +pytz==2023.4 +PyYAML==6.0.1 +scipy==1.12.0 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +tzlocal==2.1 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 1ea1e52e6fa..1425e12cd84 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -18,11 +18,8 @@ RUN apt-get update --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install \ - numpy \ - pyodbc \ - deepdiff \ - sqlglot +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.6.20200320/clickhouse-odbc-1.1.6-Linux.tar.gz" diff --git a/docker/test/sqllogic/requirements.txt b/docker/test/sqllogic/requirements.txt new file mode 100644 index 00000000000..abc0a368659 --- /dev/null +++ b/docker/test/sqllogic/requirements.txt @@ -0,0 +1,30 @@ +blinker==1.4 +cryptography==3.4.8 +dbus-python==1.2.18 +deepdiff==7.0.1 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +numpy==1.26.4 +oauthlib==3.2.0 +ordered-set==4.1.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyodbc==5.1.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +sqlglot==23.16.0 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile index 7f59f65761f..71d915b0c7a 100644 --- a/docker/test/sqltest/Dockerfile +++ b/docker/test/sqltest/Dockerfile @@ -14,9 +14,8 @@ RUN apt-get update --yes \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN pip3 install \ - pyyaml \ - clickhouse-driver +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt ARG sqltest_repo="https://github.com/elliotchance/sqltest/" diff --git a/docker/test/sqltest/requirements.txt b/docker/test/sqltest/requirements.txt new file mode 100644 index 00000000000..4a0ae3edbac --- /dev/null +++ b/docker/test/sqltest/requirements.txt @@ -0,0 +1,29 @@ +blinker==1.4 +clickhouse-driver==0.2.7 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +httplib2==0.20.2 +importlib-metadata==4.6.4 +jeepney==0.7.1 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +more-itertools==8.10.0 +oauthlib==3.2.0 +packaging==24.1 +pip==24.1.1 +pipdeptree==2.23.0 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +pytz==2024.1 +PyYAML==6.0.1 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +tzlocal==5.2 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 355e70f180e..0daf88cad7e 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -6,7 +6,6 @@ FROM clickhouse/stateless-test:$FROM_TAG RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --yes --no-install-recommends \ - python3-requests \ nodejs \ npm \ && apt-get clean \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index c3d80a7334b..5a655a3fd2b 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -25,10 +25,7 @@ RUN apt-get update -y \ openssl \ postgresql-client \ python3 \ - python3-lxml \ python3-pip \ - python3-requests \ - python3-termcolor \ qemu-user-static \ sqlite3 \ sudo \ @@ -51,7 +48,8 @@ RUN curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PR && unzip protoc-${PROTOC_VERSION}-linux-x86_64.zip -d /usr/local \ && rm protoc-${PROTOC_VERSION}-linux-x86_64.zip -RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 pyarrow==15.0.0 +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && cd /tmp/clickhouse-odbc-tmp \ diff --git a/docker/test/stateless/requirements.txt b/docker/test/stateless/requirements.txt new file mode 100644 index 00000000000..3284107e24e --- /dev/null +++ b/docker/test/stateless/requirements.txt @@ -0,0 +1,51 @@ +awscli==1.22.34 +blinker==1.4 +botocore==1.23.34 +certifi==2020.6.20 +chardet==4.0.0 +colorama==0.4.4 +cryptography==3.4.8 +dbus-python==1.2.18 +distro==1.7.0 +docutils==0.17.1 +gyp==0.1 +httplib2==0.20.2 +idna==3.3 +importlib-metadata==4.6.4 +jeepney==0.7.1 +Jinja2==3.1.3 +jmespath==0.10.0 +keyring==23.5.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==4.8.0 +MarkupSafe==2.1.5 +more-itertools==8.10.0 +numpy==1.26.3 +oauthlib==3.2.0 +packaging==24.1 +pandas==1.5.3 +pip==24.1.1 +pipdeptree==2.23.0 +pyarrow==15.0.0 +pyasn1==0.4.8 +PyJWT==2.3.0 +pyparsing==2.4.7 +python-apt==2.4.0+ubuntu3 +python-dateutil==2.8.1 +pytz==2024.1 +PyYAML==6.0.1 +requests==2.32.3 +roman==3.3 +rsa==4.8 +s3transfer==0.5.0 +scipy==1.12.0 +SecretStorage==3.3.1 +setuptools==59.6.0 +six==1.16.0 +termcolor==1.1.0 +urllib3==1.26.5 +wadllib==1.3.6 +wheel==0.37.1 +zipp==1.0.0 diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 5ece9743498..43d3c698d8a 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -6,19 +6,21 @@ source /setup_export_logs.sh # fail on errors, verbose and export all env variables set -e -x -a +MAX_RUN_TIME=${MAX_RUN_TIME:-10800} +MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME)) + # Choose random timezone for this test run. # # NOTE: that clickhouse-test will randomize session_timezone by itself as well # (it will choose between default server timezone and something specific). TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" -echo "Choosen random timezone $TZ" +echo "Chosen random timezone $TZ" ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone dpkg -i package_folder/clickhouse-common-static_*.deb dpkg -i package_folder/clickhouse-common-static-dbg_*.deb -# Accept failure in the next two commands until 24.4 is released (for compatibility and Bugfix validation run) -dpkg -i package_folder/clickhouse-odbc-bridge_*.deb || true -dpkg -i package_folder/clickhouse-library-bridge_*.deb || true +dpkg -i package_folder/clickhouse-odbc-bridge_*.deb +dpkg -i package_folder/clickhouse-library-bridge_*.deb dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-client_*.deb @@ -55,12 +57,6 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml - #todo: remove these after 24.3 released. - sudo sed -i "s|azure<|azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml - - #todo: remove these after 24.3 released. - sudo sed -i "s|local<|local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml - function remove_keeper_config() { sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml @@ -262,14 +258,17 @@ function run_tests() export -f run_tests + +# This should be enough to setup job and collect artifacts +TIMEOUT=$((MAX_RUN_TIME - 300)) if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. - timeout_with_logging "$MAX_RUN_TIME" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ + timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ | sed 's/All tests have finished//' | sed 's/No tests were run//' ||: fi -timeout_with_logging "$MAX_RUN_TIME" bash -c run_tests ||: +timeout_with_logging "$TIMEOUT" bash -c run_tests ||: echo "Files in current directory" ls -la ./ diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index 9b6ab535a90..833e1a05384 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -38,7 +38,7 @@ function fn_exists() { function timeout_with_logging() { local exit_code=0 - timeout "${@}" || exit_code="${?}" + timeout -s TERM --preserve-status "${@}" || exit_code="${?}" if [[ "${exit_code}" -eq "124" ]] then diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 7cd712b73f6..cdc1d1fa095 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -23,22 +23,8 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* # python-magic is the same version as in Ubuntu 22.04 -RUN pip3 install \ - PyGithub \ - black==23.12.0 \ - boto3 \ - codespell==2.2.1 \ - mypy==1.8.0 \ - pylint==3.1.0 \ - python-magic==0.4.24 \ - flake8==4.0.1 \ - requests \ - thefuzz \ - tqdm==4.66.4 \ - types-requests \ - unidiff \ - jwt \ - && rm -rf /root/.cache/pip +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r requirements.txt RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 ENV LC_ALL en_US.UTF-8 diff --git a/docker/test/style/requirements.txt b/docker/test/style/requirements.txt new file mode 100644 index 00000000000..bb0cd55dd1a --- /dev/null +++ b/docker/test/style/requirements.txt @@ -0,0 +1,58 @@ +aiohttp==3.9.5 +aiosignal==1.3.1 +astroid==3.1.0 +async-timeout==4.0.3 +attrs==23.2.0 +black==23.12.0 +boto3==1.34.131 +botocore==1.34.131 +certifi==2024.6.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +codespell==2.2.1 +cryptography==42.0.8 +Deprecated==1.2.14 +dill==0.3.8 +flake8==4.0.1 +frozenlist==1.4.1 +idna==3.7 +isort==5.13.2 +jmespath==1.0.1 +jwt==1.3.1 +mccabe==0.6.1 +multidict==6.0.5 +mypy==1.8.0 +mypy-extensions==1.0.0 +packaging==24.1 +pathspec==0.9.0 +pip==24.1.1 +pipdeptree==2.23.0 +platformdirs==4.2.2 +pycodestyle==2.8.0 +pycparser==2.22 +pyflakes==2.4.0 +PyGithub==2.3.0 +PyJWT==2.8.0 +pylint==3.1.0 +PyNaCl==1.5.0 +python-dateutil==2.9.0.post0 +python-magic==0.4.24 +PyYAML==6.0.1 +rapidfuzz==3.9.3 +requests==2.32.3 +s3transfer==0.10.1 +setuptools==59.6.0 +six==1.16.0 +thefuzz==0.22.1 +tomli==2.0.1 +tomlkit==0.12.5 +tqdm==4.66.4 +types-requests==2.32.0.20240622 +typing_extensions==4.12.2 +unidiff==0.7.5 +urllib3==2.2.2 +wheel==0.37.1 +wrapt==1.16.0 +yamllint==1.26.3 +yarl==1.9.4 diff --git a/docs/changelogs/v24.6.2.17-stable.md b/docs/changelogs/v24.6.2.17-stable.md new file mode 100644 index 00000000000..820937f6291 --- /dev/null +++ b/docs/changelogs/v24.6.2.17-stable.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.6.2.17-stable (5710a8b5c0c) FIXME as compared to v24.6.1.4423-stable (dcced7c8478) + +#### New Feature +* Backported in [#66002](https://github.com/ClickHouse/ClickHouse/issues/66002): Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Improvement +* Backported in [#65898](https://github.com/ClickHouse/ClickHouse/issues/65898): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#65935](https://github.com/ClickHouse/ClickHouse/issues/65935): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65907](https://github.com/ClickHouse/ClickHouse/issues/65907): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#65962](https://github.com/ClickHouse/ClickHouse/issues/65962): Add missing workload identity changes. [#65848](https://github.com/ClickHouse/ClickHouse/pull/65848) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Backported in [#66033](https://github.com/ClickHouse/ClickHouse/issues/66033): Follow up to [#65046](https://github.com/ClickHouse/ClickHouse/issues/65046). [#65928](https://github.com/ClickHouse/ClickHouse/pull/65928) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#66076](https://github.com/ClickHouse/ClickHouse/issues/66076): Fix support of non-const scale arguments in rounding functions. [#65983](https://github.com/ClickHouse/ClickHouse/pull/65983) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Backported in [#66017](https://github.com/ClickHouse/ClickHouse/issues/66017): Fix race in s3queue. [#65986](https://github.com/ClickHouse/ClickHouse/pull/65986) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 9879ee35612..22c8c704ba2 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -974,6 +974,13 @@ Default value: false - [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting +## use_compact_variant_discriminators_serialization {#use_compact_variant_discriminators_serialization} + +Enables compact mode for binary serialization of discriminators in Variant data type. +This mode allows to use significantly less memory for storing discriminators in parts when there is mostly one variant or a lot of NULL values. + +Default value: true + ## merge_workload Used to regulate how resources are utilized and shared between merges and other workloads. Specified value is used as `workload` setting value for background merges of this table. If not specified (empty string), then server setting `merge_workload` is used instead. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 1d74a63b972..ed0b29aa851 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1170,6 +1170,10 @@ Data in the VALUES clause of INSERT queries is processed by a separate stream pa Default value: 262144 (= 256 KiB). +:::note +`max_query_size` cannot be set within an SQL query (e.g., `SELECT now() SETTINGS max_query_size=10000`) because ClickHouse needs to allocate a buffer to parse the query, and this buffer size is determined by the `max_query_size` setting, which must be configured before the query is executed. +::: + ## max_parser_depth {#max_parser_depth} Limits maximum recursion depth in the recursive descent parser. Allows controlling the stack size. diff --git a/docs/en/operations/startup-scripts.md b/docs/en/operations/startup-scripts.md new file mode 100644 index 00000000000..91aa4772bcf --- /dev/null +++ b/docs/en/operations/startup-scripts.md @@ -0,0 +1,30 @@ +--- +slug: /en/operations/startup-scripts +sidebar_label: Startup Scripts +--- + +# Startup Scripts + +ClickHouse can run arbitrary SQL queries from the server configuration during startup. This can be useful for migrations or automatic schema creation. + +```xml + + + + CREATE ROLE OR REPLACE test_role + + + CREATE TABLE TestTable (id UInt64) ENGINE=TinyLog + SELECT 1; + + + +``` + +ClickHouse executes all queries from the `startup_scripts` sequentially in the specified order. If any of the queries fail, the execution of the following queries won't be interrupted. + +You can specify a conditional query in the config. In that case, the corresponding query executes only when the condition query returns the value `1` or `true`. + +:::note +If the condition query returns any other value than `1` or `true`, the result will be interpreted as `false`, and the corresponding won't be executed. +::: diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 83ce817b7db..f253b164e2a 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -357,7 +357,7 @@ Number of currently running inserts to Kafka Number of alive connections -### KeeperOutstandingRequets +### KeeperOutstandingRequests Number of outstanding requests diff --git a/docs/en/operations/utilities/clickhouse-disks.md b/docs/en/operations/utilities/clickhouse-disks.md index 76db9e41836..e22bc06b641 100644 --- a/docs/en/operations/utilities/clickhouse-disks.md +++ b/docs/en/operations/utilities/clickhouse-disks.md @@ -4,35 +4,56 @@ sidebar_position: 59 sidebar_label: clickhouse-disks --- -# clickhouse-disks +# Clickhouse-disks -A utility providing filesystem-like operations for ClickHouse disks. +A utility providing filesystem-like operations for ClickHouse disks. It can work in both interactive and not interactive modes. -Program-wide options: +## Program-wide options * `--config-file, -C` -- path to ClickHouse config, defaults to `/etc/clickhouse-server/config.xml`. * `--save-logs` -- Log progress of invoked commands to `/var/log/clickhouse-server/clickhouse-disks.log`. * `--log-level` -- What [type](../server-configuration-parameters/settings#server_configuration_parameters-logger) of events to log, defaults to `none`. * `--disk` -- what disk to use for `mkdir, move, read, write, remove` commands. Defaults to `default`. +* `--query, -q` -- single query that can be executed without launching interactive mode +* `--help, -h` -- print all the options and commands with description + +## Default Disks +After the launch two disks are initialized. The first one is a disk `local` that is supposed to imitate local file system from which clickhouse-disks utility was launched. The second one is a disk `default` that is mounted to the local filesystem in the directory that can be found in config as a parameter `clickhouse/path` (default value is `/var/lib/clickhouse`). + +## Clickhouse-disks state +For each disk that was added the utility stores current directory (as in a usual filesystem). User can change current directory and switch between disks. + +State is reflected in a prompt "`disk_name`:`path_name`" ## Commands -* `copy [--disk-from d1] [--disk-to d2] `. - Recursively copy data from `FROM_PATH` at disk `d1` (defaults to `disk` value if not provided) - to `TO_PATH` at disk `d2` (defaults to `disk` value if not provided). -* `move `. - Move file or directory from `FROM_PATH` to `TO_PATH`. -* `remove `. - Remove `PATH` recursively. -* `link `. - Create a hardlink from `FROM_PATH` to `TO_PATH`. -* `list [--recursive] ...` - List files at `PATH`s. Non-recursive by default. -* `list-disks`. +In these documentation file all mandatory positional arguments are referred as ``, named arguments are referred as `[--parameter value]`. All positional parameters could be mentioned as a named parameter with a corresponding name. + +* `cd (change-dir, change_dir) [--disk disk] ` + Change directory to path `path` on disk `disk` (default value is a current disk). No disk switching happens. +* `copy (cp) [--disk-from disk_1] [--disk-to disk_2] `. + Recursively copy data from `path-from` at disk `disk_1` (default value is a current disk (parameter `disk` in a non-interactive mode)) + to `path-to` at disk `disk_2` (default value is a current disk (parameter `disk` in a non-interactive mode)). +* `current_disk_with_path (current, current_disk, current_path)` + Print current state in format: + `Disk: "current_disk" Path: "current path on current disk"` +* `help []` + Print help message about command `command`. If `command` is not specified print information about all commands. +* `move (mv) `. + Move file or directory from `path-from` to `path-to` within current disk. +* `remove (rm, delete) `. + Remove `path` recursively on a current disk. +* `link (ln) `. + Create a hardlink from `path-from` to `path-to` on a current disk. +* `list (ls) [--recursive] ` + List files at `path`s on a current disk. Non-recursive by default. +* `list-disks (list_disks, ls-disks, ls_disks)`. List disks names. -* `mkdir [--recursive] `. +* `mkdir [--recursive] ` on a current disk. Create a directory. Non-recursive by default. -* `read: []` - Read a file from `FROM_PATH` to `TO_PATH` (`stdout` if not supplied). -* `write [FROM_PATH] `. - Write a file from `FROM_PATH` (`stdin` if not supplied) to `TO_PATH`. +* `read (r) [--path-to path]` + Read a file from `path-from` to `path` (`stdout` if not supplied). +* `switch-disk [--path path] ` + Switch to disk `disk` on path `path` (if `path` is not specified default value is a previous path on disk `disk`). +* `write (w) [--path-from path] `. + Write a file from `path` (`stdin` if `path` is not supplied, input must finish by Ctrl+D) to `path-to`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md b/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md new file mode 100644 index 00000000000..fdbfd5b9e41 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/aggthrow.md @@ -0,0 +1,37 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/aggthrow +sidebar_position: 101 +--- + +# aggThrow + +This function can be used for the purpose of testing exception safety. It will throw an exception on creation with the specified probability. + +**Syntax** + +```sql +aggThrow(throw_prob) +``` + +**Arguments** + +- `throw_prob` — Probability to throw on creation. [Float64](../../data-types/float.md). + +**Returned value** + +- An exception: `Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully`. + +**Example** + +Query: + +```sql +SELECT number % 2 AS even, aggThrow(number) FROM numbers(10) GROUP BY even; +``` + +Result: + +```response +Received exception: +Code: 503. DB::Exception: Aggregate function aggThrow has thrown exception successfully: While executing AggregatingTransform. (AGGREGATE_FUNCTION_THROW) +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index e3725b6a430..b0e5582bd87 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -43,6 +43,7 @@ Standard aggregate functions: ClickHouse-specific aggregate functions: +- [aggThrow](../reference/aggthrow.md) - [analysisOfVariance](../reference/analysis_of_variance.md) - [any](../reference/any_respect_nulls.md) - [anyHeavy](../reference/anyheavy.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md index c9c6913249c..73075c0823d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md @@ -5,23 +5,45 @@ sidebar_position: 165 # maxMap -Syntax: `maxMap(key, value)` or `maxMap(Tuple(key, value))` - Calculates the maximum from `value` array according to the keys specified in the `key` array. -Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values. +**Syntax** -The number of elements in `key` and `value` must be the same for each row that is totaled. +```sql +maxMap(key, value) +``` +or +```sql +maxMap(Tuple(key, value)) +``` -Returns a tuple of two arrays: keys and values calculated for the corresponding keys. +Alias: `maxMappedArrays` -Example: +:::note +- Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values. +- The number of elements in `key` and `value` must be the same for each row that is totaled. +::: + +**Parameters** + +- `key` — Array of keys. [Array](../../data-types/array.md). +- `value` — Array of values. [Array](../../data-types/array.md). + +**Returned value** + +- Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. [Tuple](../../data-types/tuple.md)([Array](../../data-types/array.md), [Array](../../data-types/array.md)). + +**Example** + +Query: ``` sql SELECT maxMap(a, b) FROM values('a Array(Char), b Array(Int64)', (['x', 'y'], [2, 2]), (['y', 'z'], [3, 1])) ``` +Result: + ``` text ┌─maxMap(a, b)───────────┐ │ [['x','y','z'],[2,3,1]]│ diff --git a/docs/en/sql-reference/aggregate-functions/reference/minmap.md b/docs/en/sql-reference/aggregate-functions/reference/minmap.md index b1fbb9e49f3..c0f340b3f3f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/minmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/minmap.md @@ -5,23 +5,45 @@ sidebar_position: 169 # minMap -Syntax: `minMap(key, value)` or `minMap(Tuple(key, value))` - Calculates the minimum from `value` array according to the keys specified in the `key` array. -Passing a tuple of keys and value ​​arrays is identical to passing two arrays of keys and values. +**Syntax** -The number of elements in `key` and `value` must be the same for each row that is totaled. +```sql +`minMap(key, value)` +``` +or +```sql +minMap(Tuple(key, value)) +``` -Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. +Alias: `minMappedArrays` -Example: +:::note +- Passing a tuple of keys and value arrays is identical to passing an array of keys and an array of values. +- The number of elements in `key` and `value` must be the same for each row that is totaled. +::: + +**Parameters** + +- `key` — Array of keys. [Array](../../data-types/array.md). +- `value` — Array of values. [Array](../../data-types/array.md). + +**Returned value** + +- Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. [Tuple](../../data-types/tuple.md)([Array](../../data-types/array.md), [Array](../../data-types/array.md)). + +**Example** + +Query: ``` sql SELECT minMap(a, b) FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1])) ``` +Result: + ``` text ┌─minMap(a, b)──────┐ │ ([1,2,3],[2,1,1]) │ diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index b532e0de8f0..46b1167fa33 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -83,7 +83,57 @@ Result: ``` ## makeDate32 -Like [makeDate](#makedate) but produces a [Date32](../data-types/date32.md). +Creates a date of type [Date32](../../sql-reference/data-types/date32.md) from a year, month, day (or optionally a year and a day). + +**Syntax** + +```sql +makeDate32(year, [month,] day) +``` + +**Arguments** + +- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month (optional). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). + +:::note +If `month` is omitted then `day` should take a value between `1` and `365`, otherwise it should take a value between `1` and `31`. +::: + +**Returned values** + +- A date created from the arguments. [Date32](../../sql-reference/data-types/date32.md). + +**Examples** + +Create a date from a year, month, and day: + +Query: + +```sql +SELECT makeDate32(2024, 1, 1); +``` + +Result: + +```response +2024-01-01 +``` + +Create a Date from a year and day of year: + +Query: + +``` sql +SELECT makeDate32(2024, 100); +``` + +Result: + +```response +2024-04-09 +``` ## makeDateTime @@ -125,12 +175,38 @@ Result: ## makeDateTime64 -Like [makeDateTime](#makedatetime) but produces a [DateTime64](../data-types/datetime64.md). +Creates a [DateTime64](../../sql-reference/data-types/datetime64.md) data type value from its components: year, month, day, hour, minute, second. With optional sub-second precision. **Syntax** +```sql +makeDateTime64(year, month, day, hour, minute, second[, precision]) +``` + +**Arguments** + +- `year` — Year (0-9999). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month (1-12). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day (1-31). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `hour` — Hour (0-23). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `minute` — Minute (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `second` — Second (0-59). [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `precision` — Optional precision of the sub-second component (0-9). [Integer](../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- A date and time created from the supplied arguments. [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + ``` sql -makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]]) +SELECT makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5); +``` + +```response +┌─makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5)─┐ +│ 2023-05-15 10:30:45.00779 │ +└─────────────────────────────────────────────────┘ ``` ## timestamp diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 4e252785715..8f9fd9abb0d 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -86,7 +86,7 @@ Returns the fully qualified domain name of the ClickHouse server. fqdn(); ``` -This function is case-insensitive. +Aliases: `fullHostName`, 'FQDN'. **Returned value** diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 894b9026165..e2f1b8c7f14 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -12,9 +12,7 @@ Functions for [searching](string-search-functions.md) in strings and for [replac ## empty -Checks whether the input string is empty. - -A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. +Checks whether the input string is empty. A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. The function is also available for [arrays](array-functions.md#function-empty) and [UUIDs](uuid-functions.md#empty). @@ -48,9 +46,7 @@ Result: ## notEmpty -Checks whether the input string is non-empty. - -A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. +Checks whether the input string is non-empty. A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. The function is also available for [arrays](array-functions.md#function-notempty) and [UUIDs](uuid-functions.md#notempty). @@ -96,7 +92,7 @@ length(s) **Parameters** -- `s`: An input string or array. [String](../data-types/string)/[Array](../data-types/array). +- `s` — An input string or array. [String](../data-types/string)/[Array](../data-types/array). **Returned value** @@ -149,7 +145,7 @@ lengthUTF8(s) **Parameters** -- `s`: String containing valid UTF-8 encoded text. [String](../data-types/string). +- `s` — String containing valid UTF-8 encoded text. [String](../data-types/string). **Returned value** @@ -183,8 +179,8 @@ left(s, offset) **Parameters** -- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). +- `s` — The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -230,8 +226,8 @@ leftUTF8(s, offset) **Parameters** -- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). +- `s` — The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -347,8 +343,8 @@ right(s, offset) **Parameters** -- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). +- `s` — The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -394,8 +390,8 @@ rightUTF8(s, offset) **Parameters** -- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint). +- `s` — The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint). **Returned value** @@ -547,7 +543,7 @@ Alias: `ucase` **Parameters** -- `input`: A string type [String](../data-types/string.md). +- `input` — A string type [String](../data-types/string.md). **Returned value** @@ -571,16 +567,47 @@ SELECT upper('clickhouse'); Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). +:::note +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point (such as `ẞ` and `ß`), the result may be incorrect for this code point. +::: -If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. +**Syntax** + +```sql +lowerUTF8(input) +``` + +**Parameters** + +- `input` — A string type [String](../data-types/string.md). + +**Returned value** + +- A [String](../data-types/string.md) data type value. + +**Example** + +Query: + +``` sql +SELECT lowerUTF8('MÜNCHEN') as Lowerutf8; +``` + +Result: + +``` response +┌─Lowerutf8─┐ +│ münchen │ +└───────────┘ +``` ## upperUTF8 Converts a string to uppercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. - -If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. +:::note +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point (such as `ẞ` and `ß`), the result may be incorrect for this code point. +::: **Syntax** @@ -590,7 +617,7 @@ upperUTF8(input) **Parameters** -- `input`: A string type [String](../data-types/string.md). +- `input` — A string type [String](../data-types/string.md). **Returned value** @@ -604,6 +631,8 @@ Query: SELECT upperUTF8('München') as Upperutf8; ``` +Result: + ``` response ┌─Upperutf8─┐ │ MÜNCHEN │ @@ -614,6 +643,34 @@ SELECT upperUTF8('München') as Upperutf8; Returns 1, if the set of bytes constitutes valid UTF-8-encoded text, otherwise 0. +**Syntax** + +``` sql +isValidUTF8(input) +``` + +**Parameters** + +- `input` — A string type [String](../data-types/string.md). + +**Returned value** + +- Returns `1`, if the set of bytes constitutes valid UTF-8-encoded text, otherwise `0`. + +Query: + +``` sql +SELECT isValidUTF8('\xc3\xb1') AS valid, isValidUTF8('\xc3\x28') AS invalid; +``` + +Result: + +``` response +┌─valid─┬─invalid─┐ +│ 1 │ 0 │ +└───────┴─────────┘ +``` + ## toValidUTF8 Replaces invalid UTF-8 characters by the `�` (U+FFFD) character. All running in a row invalid characters are collapsed into the one replacement character. @@ -883,7 +940,7 @@ Returns the substring of a string `s` which starts at the specified byte index ` substring(s, offset[, length]) ``` -Alias: +Aliases: - `substr` - `mid` - `byteSlice` @@ -926,9 +983,9 @@ substringUTF8(s, offset[, length]) **Arguments** -- `s`: The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md) -- `offset`: The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md). -- `length`: The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional. +- `s` — The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md) +- `offset` — The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md). +- `length` — The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional. **Returned value** @@ -964,9 +1021,9 @@ Alias: `SUBSTRING_INDEX` **Arguments** -- s: The string to extract substring from. [String](../data-types/string.md). -- delim: The character to split. [String](../data-types/string.md). -- count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) +- s — The string to extract substring from. [String](../data-types/string.md). +- delim — The character to split. [String](../data-types/string.md). +- count — The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) **Example** @@ -995,9 +1052,9 @@ substringIndexUTF8(s, delim, count) **Arguments** -- `s`: The string to extract substring from. [String](../data-types/string.md). -- `delim`: The character to split. [String](../data-types/string.md). -- `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) +- `s` — The string to extract substring from. [String](../data-types/string.md). +- `delim` — The character to split. [String](../data-types/string.md). +- `count` — The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) **Returned value** @@ -1277,7 +1334,7 @@ tryBase64Decode(encoded) **Arguments** -- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, returns an empty string. +- `encoded` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, returns an empty string. **Returned value** @@ -1309,7 +1366,7 @@ tryBase64URLDecode(encodedUrl) **Parameters** -- `encodedURL`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string. +- `encodedURL` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string. **Returned value** @@ -1555,7 +1612,7 @@ The result type is UInt64. ## normalizeQuery -Replaces literals, sequences of literals and complex aliases with placeholders. +Replaces literals, sequences of literals and complex aliases (containing whitespace, more than two digits or at least 36 bytes long such as UUIDs) with placeholder `?`. **Syntax** @@ -1573,6 +1630,8 @@ normalizeQuery(x) **Example** +Query: + ``` sql SELECT normalizeQuery('[1, 2, 3, x]') AS query; ``` @@ -1585,9 +1644,44 @@ Result: └──────────┘ ``` +## normalizeQueryKeepNames + +Replaces literals, sequences of literals with placeholder `?` but does not replace complex aliases (containing whitespace, more than two digits +or at least 36 bytes long such as UUIDs). This helps better analyze complex query logs. + +**Syntax** + +``` sql +normalizeQueryKeepNames(x) +``` + +**Arguments** + +- `x` — Sequence of characters. [String](../data-types/string.md). + +**Returned value** + +- Sequence of characters with placeholders. [String](../data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT normalizeQuery('SELECT 1 AS aComplexName123'), normalizeQueryKeepNames('SELECT 1 AS aComplexName123'); +``` + +Result: + +```result +┌─normalizeQuery('SELECT 1 AS aComplexName123')─┬─normalizeQueryKeepNames('SELECT 1 AS aComplexName123')─┐ +│ SELECT ? AS `?` │ SELECT ? AS aComplexName123 │ +└───────────────────────────────────────────────┴────────────────────────────────────────────────────────┘ +``` + ## normalizedQueryHash -Returns identical 64bit hash values without the values of literals for similar queries. Can be helpful to analyze query log. +Returns identical 64bit hash values without the values of literals for similar queries. Can be helpful to analyze query logs. **Syntax** @@ -1605,6 +1699,8 @@ normalizedQueryHash(x) **Example** +Query: + ``` sql SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`') AS res; ``` @@ -1617,6 +1713,43 @@ Result: └─────┘ ``` +## normalizedQueryHashKeepNames + +Like [normalizedQueryHash](#normalizedqueryhash) it returns identical 64bit hash values without the values of literals for similar queries but it does not replace complex aliases (containing whitespace, more than two digits +or at least 36 bytes long such as UUIDs) with a placeholder before hashing. Can be helpful to analyze query logs. + +**Syntax** + +``` sql +normalizedQueryHashKeepNames(x) +``` + +**Arguments** + +- `x` — Sequence of characters. [String](../data-types/string.md). + +**Returned value** + +- Hash value. [UInt64](../data-types/int-uint.md#uint-ranges). + +**Example** + +``` sql +SELECT normalizedQueryHash('SELECT 1 AS `xyz123`') != normalizedQueryHash('SELECT 1 AS `abc123`') AS normalizedQueryHash; +SELECT normalizedQueryHashKeepNames('SELECT 1 AS `xyz123`') != normalizedQueryHashKeepNames('SELECT 1 AS `abc123`') AS normalizedQueryHashKeepNames; +``` + +Result: + +```result +┌─normalizedQueryHash─┐ +│ 0 │ +└─────────────────────┘ +┌─normalizedQueryHashKeepNames─┐ +│ 1 │ +└──────────────────────────────┘ +``` + ## normalizeUTF8NFC Converts a string to [NFC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string is valid UTF8-encoded text. @@ -1935,7 +2068,7 @@ soundex(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** @@ -1968,7 +2101,7 @@ punycodeEncode(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** @@ -2001,7 +2134,7 @@ punycodeEncode(val) **Arguments** -- `val` - Punycode-encoded string. [String](../data-types/string.md) +- `val` — Punycode-encoded string. [String](../data-types/string.md) **Returned value** @@ -2027,7 +2160,7 @@ Like `punycodeDecode` but returns an empty string if no valid Punycode-encoded s ## idnaEncode -Returns the the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. +Returns the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. The input string must be UTF-encoded and translatable to an ASCII string, otherwise an exception is thrown. Note: No percent decoding or trimming of tabs, spaces or control characters is performed. @@ -2039,7 +2172,7 @@ idnaEncode(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** @@ -2065,7 +2198,7 @@ Like `idnaEncode` but returns an empty string in case of an error instead of thr ## idnaDecode -Returns the the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. +Returns the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism. In case of an error (e.g. because the input is invalid), the input string is returned. Note that repeated application of `idnaEncode()` and `idnaDecode()` does not necessarily return the original string due to case normalization. @@ -2077,7 +2210,7 @@ idnaDecode(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** @@ -2121,7 +2254,7 @@ Result: └───────────────────────────────────────────┘ ``` -Alias: mismatches +Alias: `mismatches` ## stringJaccardIndex @@ -2175,7 +2308,7 @@ Result: └─────────────────────────────────────┘ ``` -Alias: levenshteinDistance +Alias: `levenshteinDistance` ## editDistanceUTF8 @@ -2201,7 +2334,7 @@ Result: └─────────────────────────────────────┘ ``` -Alias: levenshteinDistanceUTF8 +Alias: `levenshteinDistanceUTF8` ## damerauLevenshteinDistance @@ -2279,13 +2412,93 @@ Result: Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. +:::note +Because `initCap` converts only the first letter of each word to upper case you may observe unexpected behaviour for words containing apostrophes or capital letters. For example: + +```sql +SELECT initCap('mother''s daughter'), initCap('joe McAdam'); +``` + +will return + +```response +┌─initCap('mother\'s daughter')─┬─initCap('joe McAdam')─┐ +│ Mother'S Daughter │ Joe Mcadam │ +└───────────────────────────────┴───────────────────────┘ +``` + +This is a known behaviour, with no plans currently to fix it. +::: + +**Syntax** + +```sql +initcap(val) +``` + +**Arguments** + +- `val` — Input value. [String](../data-types/string.md). + +**Returned value** + +- `val` with the first letter of each word converted to upper case. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT initcap('building for fast'); +``` + +Result: + +```text +┌─initcap('building for fast')─┐ +│ Building For Fast │ +└──────────────────────────────┘ +``` + ## initcapUTF8 -Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. - -Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). +Like [initcap](#initcap), `initcapUTF8` converts the first letter of each word to upper case and the rest to lower case. Assumes that the string contains valid UTF-8 encoded text. +If this assumption is violated, no exception is thrown and the result is undefined. +:::note +This function does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. +::: + +**Syntax** + +```sql +initcapUTF8(val) +``` + +**Arguments** + +- `val` — Input value. [String](../data-types/string.md). + +**Returned value** + +- `val` with the first letter of each word converted to upper case. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT initcapUTF8('не тормозит'); +``` + +Result: + +```text +┌─initcapUTF8('не тормозит')─┐ +│ Не Тормозит │ +└────────────────────────────┘ +``` ## firstLine @@ -2299,7 +2512,7 @@ firstLine(val) **Arguments** -- `val` - Input value. [String](../data-types/string.md) +- `val` — Input value. [String](../data-types/string.md) **Returned value** diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 7aeb1f5b2a7..8793ebdd1a3 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -34,7 +34,7 @@ Alias: `replace`. Replaces the first occurrence of the substring matching the regular expression `pattern` (in [re2 syntax](https://github.com/google/re2/wiki/Syntax)) in `haystack` by the `replacement` string. -`replacement` can containing substitutions `\0-\9`. +`replacement` can contain substitutions `\0-\9`. Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match. To use a verbatim `\` character in the `pattern` or `replacement` strings, escape it using `\`. diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index 2cec1987c20..5169d4487ec 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -6,44 +6,122 @@ sidebar_label: Time Window # Time Window Functions -Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below: +Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with [WindowView](../statements/create/view.md/#window-view-experimental) are listed below: ## tumble A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`). +**Syntax** + ``` sql tumble(time_attr, interval [, timezone]) ``` **Arguments** -- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. -- `interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. +- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md)). **Example** Query: ``` sql -SELECT tumble(now(), toIntervalDay('1')) +SELECT tumble(now(), toIntervalDay('1')); ``` Result: ``` text ┌─tumble(now(), toIntervalDay('1'))─────────────┐ -│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │ +│ ('2024-07-04 00:00:00','2024-07-05 00:00:00') │ └───────────────────────────────────────────────┘ ``` +## tumbleStart + +Returns the inclusive lower bound of the corresponding [tumbling window](#tumble). + +**Syntax** + +``` sql +tumbleStart(time_attr, interval [, timezone]); +``` + +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT tumbleStart(now(), toIntervalDay('1')); +``` + +Result: + +```response +┌─tumbleStart(now(), toIntervalDay('1'))─┐ +│ 2024-07-04 00:00:00 │ +└────────────────────────────────────────┘ +``` + +## tumbleEnd + +Returns the exclusive upper bound of the corresponding [tumbling window](#tumble). + +**Syntax** + +``` sql +tumbleEnd(time_attr, interval [, timezone]); +``` + +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `interval` — Window interval in [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The inclusive lower bound of the corresponding tumbling window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT tumbleEnd(now(), toIntervalDay('1')); +``` + +Result: + +```response +┌─tumbleEnd(now(), toIntervalDay('1'))─┐ +│ 2024-07-05 00:00:00 │ +└──────────────────────────────────────┘ +``` + ## hop -A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. +A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. ``` sql hop(time_attr, hop_interval, window_interval [, timezone]) @@ -51,65 +129,118 @@ hop(time_attr, hop_interval, window_interval [, timezone]) **Arguments** -- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type. -- `hop_interval` - Hop interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. -- `window_interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number. -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). **Returned values** -- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. +- The inclusive lower and exclusive upper bound of the corresponding hopping window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`. + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: **Example** Query: ``` sql -SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) +SELECT hop(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); ``` Result: ``` text -┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐ -│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │ -└───────────────────────────────────────────────────────────┘ -``` - -## tumbleStart - -Returns the inclusive lower bound of the corresponding tumbling window. - -``` sql -tumbleStart(bounds_tuple); -tumbleStart(time_attr, interval [, timezone]); -``` - -## tumbleEnd - -Returns the exclusive upper bound of the corresponding tumbling window. - -``` sql -tumbleEnd(bounds_tuple); -tumbleEnd(time_attr, interval [, timezone]); +┌─hop(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ ('2024-07-03 00:00:00','2024-07-05 00:00:00') │ +└────────────────────────────────────────────────────┘ ``` ## hopStart -Returns the inclusive lower bound of the corresponding hopping window. +Returns the inclusive lower bound of the corresponding [hopping window](#hop). + +**Syntax** ``` sql -hopStart(bounds_tuple); hopStart(time_attr, hop_interval, window_interval [, timezone]); ``` +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The inclusive lower bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: + +**Example** + +Query: + +``` sql +SELECT hopStart(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); +``` + +Result: + +``` text +┌─hopStart(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ 2024-07-03 00:00:00 │ +└─────────────────────────────────────────────────────────┘ +``` ## hopEnd -Returns the exclusive upper bound of the corresponding hopping window. +Returns the exclusive upper bound of the corresponding [hopping window](#hop). + +**Syntax** ``` sql -hopEnd(bounds_tuple); hopEnd(time_attr, hop_interval, window_interval [, timezone]); +``` +**Arguments** + +- `time_attr` — Date and time. [DateTime](../data-types/datetime.md). +- `hop_interval` — Positive Hop interval. [Interval](../data-types/special-data-types/interval.md). +- `window_interval` — Positive Window interval. [Interval](../data-types/special-data-types/interval.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +The parameters above can also be passed to the function as a [tuple](../data-types/tuple.md). + +**Returned values** + +- The exclusive upper bound of the corresponding hopping window. [DateTime](../data-types/datetime.md), [Tuple](../data-types/tuple.md) or [UInt32](../data-types/int-uint.md). + +:::note +Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. +::: + +**Example** + +Query: + +``` sql +SELECT hopEnd(now(), INTERVAL '1' DAY, INTERVAL '2' DAY); +``` + +Result: + +``` text +┌─hopEnd(now(), toIntervalDay('1'), toIntervalDay('2'))─┐ +│ 2024-07-05 00:00:00 │ +└───────────────────────────────────────────────────────┘ + ``` ## Related content diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 0663be08240..3b4d68e44b2 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -7,7 +7,7 @@ sidebar_label: Tuples ## tuple A function that allows grouping multiple columns. -For columns with the types T1, T2, ..., it returns a Tuple(T1, T2, ...) type tuple containing these columns. There is no cost to execute the function. +For columns C1, C2, ... with the types T1, T2, ..., it returns a named Tuple(C1 T1, C2 T2, ...) type tuple containing these columns if their names are unique and can be treated as unquoted identifiers, otherwise a Tuple(T1, T2, ...) is returned. There is no cost to execute the function. Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table. The function implements the operator `(x, y, ...)`. @@ -259,6 +259,60 @@ Result: └───────────────────────────────────────┘ ``` +## tupleNames + +Converts a tuple into an array of column names. For a tuple in the form `Tuple(a T, b T, ...)`, it returns an array of strings representing the named columns of the tuple. If the tuple elements do not have explicit names, their indices will be used as the column names instead. + +**Syntax** + +``` sql +tupleNames(tuple) +``` + +**Arguments** + +- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values. + +**Returned value** + +- An array with strings. + +Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)). + +**Example** + +Query: + +``` sql +CREATE TABLE tupletest (col Tuple(user_ID UInt64, session_ID UInt64)) ENGINE = Memory; + +INSERT INTO tupletest VALUES (tuple(1, 2)); + +SELECT tupleNames(col) FROM tupletest; +``` + +Result: + +``` text +┌─tupleNames(col)──────────┐ +│ ['user_ID','session_ID'] │ +└──────────────────────────┘ +``` + +If you pass a simple tuple to the function, ClickHouse uses the indexes of the columns as their names: + +``` sql +SELECT tupleNames(tuple(3, 2, 1)); +``` + +Result: + +``` text +┌─tupleNames((3, 2, 1))─┐ +│ ['1','2','3'] │ +└───────────────────────┘ +``` + ## tuplePlus Calculates the sum of corresponding values of two tuples of the same size. diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index ad40725d680..24b356eca87 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -600,7 +600,7 @@ mapApply(func, map) **Arguments** -- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `func` — [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). - `map` — [Map](../data-types/map.md). **Returned value** @@ -831,7 +831,39 @@ SELECT mapSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; └──────────────────────────────┘ ``` -For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function. +For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function. + +## mapPartialSort + +Sorts the elements of a map in ascending order with additional `limit` argument allowing partial sorting. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. + +**Syntax** + +```sql +mapPartialSort([func,] limit, map) +``` +**Arguments** + +- `func` – Optional function to apply to the keys and values of the map. [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `limit` – Elements in range [1..limit] are sorted. [(U)Int](../data-types/int-uint.md). +- `map` – Map to sort. [Map](../data-types/map.md). + +**Returned value** + +- Partially sorted map. [Map](../data-types/map.md). + +**Example** + +``` sql +SELECT mapPartialSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2)); +``` + +``` text +┌─mapPartialSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐ +│ {'k2':1,'k3':2,'k1':3} │ +└───────────────────────────────────────────────────────────────────────────┘ +``` ## mapReverseSort(\[func,\], map) @@ -861,3 +893,35 @@ SELECT mapReverseSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; ``` For more details see function [arrayReverseSort](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort). + +## mapPartialReverseSort + +Sorts the elements of a map in descending order with additional `limit` argument allowing partial sorting. +If the `func` function is specified, the sorting order is determined by the result of the `func` function applied to the keys and values of the map. + +**Syntax** + +```sql +mapPartialReverseSort([func,] limit, map) +``` +**Arguments** + +- `func` – Optional function to apply to the keys and values of the map. [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `limit` – Elements in range [1..limit] are sorted. [(U)Int](../data-types/int-uint.md). +- `map` – Map to sort. [Map](../data-types/map.md). + +**Returned value** + +- Partially sorted map. [Map](../data-types/map.md). + +**Example** + +``` sql +SELECT mapPartialReverseSort((k, v) -> v, 2, map('k1', 3, 'k2', 1, 'k3', 2)); +``` + +``` text +┌─mapPartialReverseSort(lambda(tuple(k, v), v), 2, map('k1', 3, 'k2', 1, 'k3', 2))─┐ +│ {'k1':3,'k3':2,'k2':1} │ +└──────────────────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/programs/disks/CMakeLists.txt b/programs/disks/CMakeLists.txt index f0949fcfceb..7e8afe084fb 100644 --- a/programs/disks/CMakeLists.txt +++ b/programs/disks/CMakeLists.txt @@ -1,6 +1,8 @@ set (CLICKHOUSE_DISKS_SOURCES DisksApp.cpp + DisksClient.cpp ICommand.cpp + CommandChangeDirectory.cpp CommandCopy.cpp CommandLink.cpp CommandList.cpp @@ -9,10 +11,14 @@ set (CLICKHOUSE_DISKS_SOURCES CommandMove.cpp CommandRead.cpp CommandRemove.cpp - CommandWrite.cpp) + CommandSwitchDisk.cpp + CommandWrite.cpp + CommandHelp.cpp + CommandTouch.cpp + CommandGetCurrentDiskAndPath.cpp) if (CLICKHOUSE_CLOUD) - set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp) + set (CLICKHOUSE_DISKS_SOURCES ${CLICKHOUSE_DISKS_SOURCES} CommandPackedIO.cpp) endif () set (CLICKHOUSE_DISKS_LINK diff --git a/programs/disks/CommandChangeDirectory.cpp b/programs/disks/CommandChangeDirectory.cpp new file mode 100644 index 00000000000..b545f37de72 --- /dev/null +++ b/programs/disks/CommandChangeDirectory.cpp @@ -0,0 +1,35 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandChangeDirectory final : public ICommand +{ +public: + explicit CommandChangeDirectory() : ICommand() + { + command_name = "cd"; + description = "Change directory (makes sense only in interactive mode)"; + options_description.add_options()("path", po::value(), "the path to which we want to change (mandatory, positional)")( + "disk", po::value(), "A disk where the path is changed (without disk switching)"); + positional_options_description.add("path", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + DiskWithPath & disk = getDiskWithPath(client, options, "disk"); + String path = getValueFromCommandLineOptionsThrow(options, "path"); + disk.setPath(path); + } +}; + +CommandPtr makeCommandChangeDirectory() +{ + return std::make_shared(); +} + +} diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index f176fa277d7..e3051f2702c 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -1,6 +1,8 @@ -#include "ICommand.h" #include +#include "Common/Exception.h" #include +#include "DisksClient.h" +#include "ICommand.h" namespace DB { @@ -10,59 +12,89 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + class CommandCopy final : public ICommand { public: - CommandCopy() + explicit CommandCopy() : ICommand() { command_name = "copy"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Recursively copy data from `FROM_PATH` to `TO_PATH`"; - usage = "copy [OPTION]... "; - command_option_description->add_options() - ("disk-from", po::value(), "disk from which we copy") - ("disk-to", po::value(), "disk to which we copy"); + description = "Recursively copy data from `path-from` to `path-to`"; + options_description.add_options()( + "disk-from", po::value(), "disk from which we copy is executed (default value is a current disk)")( + "disk-to", po::value(), "disk to which copy is executed (default value is a current disk)")( + "path-from", po::value(), "path from which copy is executed (mandatory, positional)")( + "path-to", po::value(), "path to which copy is executed (mandatory, positional)")( + "recursive,r", "recursively copy the directory (required to remove a directory)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("disk-from")) - config.setString("disk-from", options["disk-from"].as()); - if (options.count("disk-to")) - config.setString("disk-to", options["disk-to"].as()); - } + auto disk_from = getDiskWithPath(client, options, "disk-from"); + auto disk_to = getDiskWithPath(client, options, "disk-to"); + String path_from = disk_from.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + String path_to = disk_to.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + bool recursive = options.count("recursive"); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 2) + if (!disk_from.getDisk()->exists(path_from)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "cannot stat '{}' on disk '{}': No such file or directory", + path_from, + disk_from.getDisk()->getName()); } + else if (disk_from.getDisk()->isFile(path_from)) + { + auto target_location = getTargetLocation(path_from, disk_to, path_to); + if (!disk_to.getDisk()->exists(target_location) || disk_to.getDisk()->isFile(target_location)) + { + disk_from.getDisk()->copyFile( + path_from, + *disk_to.getDisk(), + target_location, + /* read_settings= */ {}, + /* write_settings= */ {}, + /* cancellation_hook= */ {}); + } + else + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "cannot overwrite directory {} with non-directory {}", target_location, path_from); + } + } + else if (disk_from.getDisk()->isDirectory(path_from)) + { + if (!recursive) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "--recursive not specified; omitting directory {}", path_from); + } + auto target_location = getTargetLocation(path_from, disk_to, path_to); - String disk_name_from = config.getString("disk-from", config.getString("disk", "default")); - String disk_name_to = config.getString("disk-to", config.getString("disk", "default")); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk_from = disk_selector->get(disk_name_from); - DiskPtr disk_to = disk_selector->get(disk_name_to); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to, /* read_settings= */ {}, /* write_settings= */ {}, /* cancellation_hook= */ {}); + if (disk_to.getDisk()->isFile(target_location)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory {} with directory {}", path_to, target_location); + } + else if (!disk_to.getDisk()->exists(target_location)) + { + disk_to.getDisk()->createDirectory(target_location); + } + disk_from.getDisk()->copyDirectoryContent( + path_from, + disk_to.getDisk(), + target_location, + /* read_settings= */ {}, + /* write_settings= */ {}, + /* cancellation_hook= */ {}); + } } }; + +CommandPtr makeCommandCopy() +{ + return std::make_shared(); } -std::unique_ptr makeCommandCopy() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandGetCurrentDiskAndPath.cpp b/programs/disks/CommandGetCurrentDiskAndPath.cpp new file mode 100644 index 00000000000..15f8ef5aae8 --- /dev/null +++ b/programs/disks/CommandGetCurrentDiskAndPath.cpp @@ -0,0 +1,30 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandGetCurrentDiskAndPath final : public ICommand +{ +public: + explicit CommandGetCurrentDiskAndPath() : ICommand() + { + command_name = "current_disk_with_path"; + description = "Prints current disk and path (which coincide with the prompt)"; + } + + void executeImpl(const CommandLineOptions &, DisksClient & client) override + { + auto disk = client.getCurrentDiskWithPath(); + std::cout << "Disk: " << disk.getDisk()->getName() << "\nPath: " << disk.getCurrentPath() << std::endl; + } +}; + +CommandPtr makeCommandGetCurrentDiskAndPath() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandHelp.cpp b/programs/disks/CommandHelp.cpp new file mode 100644 index 00000000000..a3aee9498d3 --- /dev/null +++ b/programs/disks/CommandHelp.cpp @@ -0,0 +1,43 @@ +#include "DisksApp.h" +#include "ICommand.h" + +#include +#include + +namespace DB +{ + +class CommandHelp final : public ICommand +{ +public: + explicit CommandHelp(const DisksApp & disks_app_) : disks_app(disks_app_) + { + command_name = "help"; + description = "Print help message about available commands"; + options_description.add_options()( + "command", po::value(), "A command to help with (optional, positional), if not specified, help lists all the commands"); + positional_options_description.add("command", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & /*client*/) override + { + std::optional command = getValueFromCommandLineOptionsWithOptional(options, "command"); + if (command.has_value()) + { + disks_app.printCommandHelpMessage(command.value()); + } + else + { + disks_app.printAvailableCommandsHelpMessage(); + } + } + + const DisksApp & disks_app; +}; + +CommandPtr makeCommandHelp(const DisksApp & disks_app) +{ + return std::make_shared(disks_app); +} + +} diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index dbaa3162f82..11c196cafc5 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -1,14 +1,9 @@ -#include "ICommand.h" #include +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandLink final : public ICommand { public: @@ -16,42 +11,27 @@ public: { command_name = "link"; description = "Create hardlink from `from_path` to `to_path`"; - usage = "link [OPTION]... "; + options_description.add_options()( + "path-from", po::value(), "the path from which a hard link will be created (mandatory, positional)")( + "path-to", po::value(), "the path where a hard link will be created (mandatory, positional)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - } + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 2) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } + const String & path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + const String & path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); - String disk_name = config.getString("disk", "default"); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - disk->createHardLink(relative_path_from, relative_path_to); + disk.getDisk()->createHardLink(path_from, path_to); } }; + +CommandPtr makeCommandLink() +{ + return std::make_shared(); } -std::unique_ptr makeCommandLink() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index 7213802ea86..77479b1d217 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -1,98 +1,95 @@ -#include "ICommand.h" #include #include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandList final : public ICommand { public: - CommandList() + explicit CommandList() : ICommand() { command_name = "list"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); description = "List files at path[s]"; - usage = "list [OPTION]... ..."; - command_option_description->add_options() - ("recursive", "recursively list all directories"); + options_description.add_options()("recursive", "recursively list the directory")("all", "show hidden files")( + "path", po::value(), "the path of listing (mandatory, positional)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("recursive")) - config.setBool("recursive", true); - } - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - bool recursive = config.getBool("recursive", false); + bool recursive = options.count("recursive"); + bool show_hidden = options.count("all"); + auto disk = client.getCurrentDiskWithPath(); + String path = getValueFromCommandLineOptionsWithDefault(options, "path", "."); if (recursive) - listRecursive(disk, relative_path); + listRecursive(disk, path, show_hidden); else - list(disk, relative_path); + list(disk, path, show_hidden); } private: - static void list(const DiskPtr & disk, const std::string & relative_path) + static void list(const DiskWithPath & disk, const std::string & path, bool show_hidden) { - std::vector file_names; - disk->listFiles(relative_path, file_names); + std::vector file_names = disk.listAllFilesByPath(path); + std::vector selected_and_sorted_file_names{}; for (const auto & file_name : file_names) - std::cout << file_name << '\n'; + if (show_hidden || (!file_name.starts_with('.'))) + selected_and_sorted_file_names.push_back(file_name); + + std::sort(selected_and_sorted_file_names.begin(), selected_and_sorted_file_names.end()); + for (const auto & file_name : selected_and_sorted_file_names) + { + std::cout << file_name << "\n"; + } } - static void listRecursive(const DiskPtr & disk, const std::string & relative_path) + static void listRecursive(const DiskWithPath & disk, const std::string & relative_path, bool show_hidden) { - std::vector file_names; - disk->listFiles(relative_path, file_names); + std::vector file_names = disk.listAllFilesByPath(relative_path); + std::vector selected_and_sorted_file_names{}; std::cout << relative_path << ":\n"; - if (!file_names.empty()) - { - for (const auto & file_name : file_names) - std::cout << file_name << '\n'; - std::cout << "\n"; - } - for (const auto & file_name : file_names) + if (show_hidden || (!file_name.starts_with('.'))) + selected_and_sorted_file_names.push_back(file_name); + + std::sort(selected_and_sorted_file_names.begin(), selected_and_sorted_file_names.end()); + for (const auto & file_name : selected_and_sorted_file_names) { - auto path = relative_path.empty() ? file_name : (relative_path + "/" + file_name); - if (disk->isDirectory(path)) - listRecursive(disk, path); + std::cout << file_name << "\n"; + } + std::cout << "\n"; + + for (const auto & file_name : selected_and_sorted_file_names) + { + auto path = [&]() -> String + { + if (relative_path.ends_with("/")) + { + return relative_path + file_name; + } + else + { + return relative_path + "/" + file_name; + } + }(); + if (disk.isDirectory(path)) + { + listRecursive(disk, path, show_hidden); + } } } }; -} -std::unique_ptr makeCommandList() +CommandPtr makeCommandList() { - return std::make_unique(); + return std::make_shared(); +} } diff --git a/programs/disks/CommandListDisks.cpp b/programs/disks/CommandListDisks.cpp index 79da021fd00..9fb67fed5e0 100644 --- a/programs/disks/CommandListDisks.cpp +++ b/programs/disks/CommandListDisks.cpp @@ -1,68 +1,40 @@ -#include "ICommand.h" +#include #include +#include +#include "DisksClient.h" +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandListDisks final : public ICommand { public: - CommandListDisks() + explicit CommandListDisks() : ICommand() { command_name = "list-disks"; - description = "List disks names"; - usage = "list-disks [OPTION]"; + description = "Lists all available disks"; } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr &, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions &, DisksClient & client) override { - if (!command_arguments.empty()) + std::vector sorted_and_selected{}; + for (const auto & disk_name : client.getAllDiskNames()) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + sorted_and_selected.push_back(disk_name + ":" + client.getDiskWithPath(disk_name).getAbsolutePath("")); } - - constexpr auto config_prefix = "storage_configuration.disks"; - constexpr auto default_disk_name = "default"; - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_prefix, keys); - - bool has_default_disk = false; - - /// For the output to be ordered - std::set disks; - - for (const auto & disk_name : keys) + std::sort(sorted_and_selected.begin(), sorted_and_selected.end()); + for (const auto & disk_name : sorted_and_selected) { - if (disk_name == default_disk_name) - has_default_disk = true; - disks.insert(disk_name); + std::cout << disk_name << "\n"; } - - if (!has_default_disk) - disks.insert(default_disk_name); - - for (const auto & disk : disks) - std::cout << disk << '\n'; } -}; -} -std::unique_ptr makeCommandListDisks() +private: +}; + +CommandPtr makeCommandListDisks() { - return std::make_unique(); + return std::make_shared(); +} } diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index 6d33bdec498..c6222f326d4 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -6,61 +6,35 @@ namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandMkDir final : public ICommand { public: CommandMkDir() { command_name = "mkdir"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Create a directory"; - usage = "mkdir [OPTION]... "; - command_option_description->add_options() - ("recursive", "recursively create directories"); + description = "Creates a directory"; + options_description.add_options()("parents", "recursively create directories")( + "path", po::value(), "the path on which directory should be created (mandatory, positional)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("recursive")) - config.setBool("recursive", true); - } + bool recursive = options.count("parents"); + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) - { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - bool recursive = config.getBool("recursive", false); + String path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); if (recursive) - disk->createDirectories(relative_path); + disk.getDisk()->createDirectories(path); else - disk->createDirectory(relative_path); + disk.getDisk()->createDirectory(path); } }; + +CommandPtr makeCommandMkDir() +{ + return std::make_shared(); } -std::unique_ptr makeCommandMkDir() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 75cf96252ed..e3d485032e0 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -1,5 +1,5 @@ -#include "ICommand.h" #include +#include "ICommand.h" namespace DB { @@ -9,6 +9,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + class CommandMove final : public ICommand { public: @@ -16,44 +17,62 @@ public: { command_name = "move"; description = "Move file or directory from `from_path` to `to_path`"; - usage = "move [OPTION]... "; + options_description.add_options()("path-from", po::value(), "path from which we copy (mandatory, positional)")( + "path-to", po::value(), "path to which we copy (mandatory, positional)"); + positional_options_description.add("path-from", 1); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (command_arguments.size() != 2) + auto disk = client.getCurrentDiskWithPath(); + + String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + + if (disk.getDisk()->isFile(path_from)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + disk.getDisk()->moveFile(path_from, path_to); + } + else if (disk.getDisk()->isDirectory(path_from)) + { + auto target_location = getTargetLocation(path_from, disk, path_to); + if (!disk.getDisk()->exists(target_location)) + { + disk.getDisk()->createDirectory(target_location); + disk.getDisk()->moveDirectory(path_from, target_location); + } + else + { + if (disk.getDisk()->isFile(target_location)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory '{}' with directory '{}'", target_location, path_from); + } + if (!disk.getDisk()->isDirectoryEmpty(target_location)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot move '{}' to '{}': Directory not empty", path_from, target_location); + } + else + { + disk.getDisk()->moveDirectory(path_from, target_location); + } + } + } + else if (!disk.getDisk()->exists(path_from)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "cannot stat '{}' on disk: '{}': No such file or directory", + path_from, + disk.getDisk()->getName()); } - - String disk_name = config.getString("disk", "default"); - - const String & path_from = command_arguments[0]; - const String & path_to = command_arguments[1]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path_from = validatePathAndGetAsRelative(path_from); - String relative_path_to = validatePathAndGetAsRelative(path_to); - - if (disk->isFile(relative_path_from)) - disk->moveFile(relative_path_from, relative_path_to); - else - disk->moveDirectory(relative_path_from, relative_path_to); } }; + +CommandPtr makeCommandMove() +{ + return std::make_shared(); } -std::unique_ptr makeCommandMove() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index 0f3ac7ab98c..277e735f507 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -1,78 +1,52 @@ -#include "ICommand.h" -#include #include #include #include +#include #include +#include "ICommand.h" namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandRead final : public ICommand { public: CommandRead() { command_name = "read"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Read a file from `FROM_PATH` to `TO_PATH`"; - usage = "read [OPTION]... []"; - command_option_description->add_options() - ("output", po::value(), "file to which we are reading, defaults to `stdout`"); + description = "Read a file from `path-from` to `path-to`"; + options_description.add_options()("path-from", po::value(), "file from which we are reading (mandatory, positional)")( + "path-to", po::value(), "file to which we are writing, defaults to `stdout`"); + positional_options_description.add("path-from", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("output")) - config.setString("output", options["output"].as()); - } + auto disk = client.getCurrentDiskWithPath(); + String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-from")); + std::optional path_to = getValueFromCommandLineOptionsWithOptional(options, "path-to"); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) + auto in = disk.getDisk()->readFile(path_from); + std::unique_ptr out = {}; + if (path_to.has_value()) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - - String disk_name = config.getString("disk", "default"); - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(command_arguments[0]); - - String path_output = config.getString("output", ""); - - if (!path_output.empty()) - { - String relative_path_output = validatePathAndGetAsRelative(path_output); - - auto in = disk->readFile(relative_path); - auto out = disk->writeFile(relative_path_output); + String relative_path_to = disk.getRelativeFromRoot(path_to.value()); + out = disk.getDisk()->writeFile(relative_path_to); copyData(*in, *out); - out->finalize(); } else { - auto in = disk->readFile(relative_path); - std::unique_ptr out = std::make_unique(STDOUT_FILENO); + out = std::make_unique(STDOUT_FILENO); copyData(*in, *out); + out->write('\n'); } + out->finalize(); } }; + +CommandPtr makeCommandRead() +{ + return std::make_shared(); } -std::unique_ptr makeCommandRead() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index 0c631eacff3..4388f6c0b14 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -1,5 +1,6 @@ -#include "ICommand.h" #include +#include "Common/Exception.h" +#include "ICommand.h" namespace DB { @@ -9,46 +10,49 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + class CommandRemove final : public ICommand { public: CommandRemove() { command_name = "remove"; - description = "Remove file or directory with all children. Throws exception if file doesn't exists.\nPath should be in format './' or './path' or 'path'"; - usage = "remove [OPTION]... "; + description = "Remove file or directory. Throws exception if file doesn't exists"; + options_description.add_options()("path", po::value(), "path that is going to be deleted (mandatory, positional)")( + "recursive,r", "recursively removes the directory (required to remove a directory)"); + positional_options_description.add("path", 1); } - void processOptions( - Poco::Util::LayeredConfiguration &, - po::variables_map &) const override - {} - - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (command_arguments.size() != 1) + auto disk = client.getCurrentDiskWithPath(); + const String & path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path")); + bool recursive = options.count("recursive"); + if (!disk.getDisk()->exists(path)) { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} on disk {} doesn't exist", path, disk.getDisk()->getName()); + } + else if (disk.getDisk()->isDirectory(path)) + { + if (!recursive) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot remove '{}': Is a directory", path); + } + else + { + disk.getDisk()->removeRecursive(path); + } + } + else + { + disk.getDisk()->removeFileIfExists(path); } - - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - disk->removeRecursive(relative_path); } }; + +CommandPtr makeCommandRemove() +{ + return std::make_shared(); } -std::unique_ptr makeCommandRemove() -{ - return std::make_unique(); } diff --git a/programs/disks/CommandSwitchDisk.cpp b/programs/disks/CommandSwitchDisk.cpp new file mode 100644 index 00000000000..fa02d991365 --- /dev/null +++ b/programs/disks/CommandSwitchDisk.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include "DisksApp.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandSwitchDisk final : public ICommand +{ +public: + explicit CommandSwitchDisk() : ICommand() + { + command_name = "switch-disk"; + description = "Switch disk (makes sense only in interactive mode)"; + options_description.add_options()("disk", po::value(), "the disk to switch to (mandatory, positional)")( + "path", po::value(), "the path to switch on the disk"); + positional_options_description.add("disk", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + String disk = getValueFromCommandLineOptions(options, "disk"); + std::optional path = getValueFromCommandLineOptionsWithOptional(options, "path"); + + client.switchToDisk(disk, path); + } +}; + +CommandPtr makeCommandSwitchDisk() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandTouch.cpp b/programs/disks/CommandTouch.cpp new file mode 100644 index 00000000000..c0bdb64cf9e --- /dev/null +++ b/programs/disks/CommandTouch.cpp @@ -0,0 +1,34 @@ +#include +#include +#include "DisksApp.h" +#include "DisksClient.h" +#include "ICommand.h" + +namespace DB +{ + +class CommandTouch final : public ICommand +{ +public: + explicit CommandTouch() : ICommand() + { + command_name = "touch"; + description = "Create a file by path"; + options_description.add_options()("path", po::value(), "the path of listing (mandatory, positional)"); + positional_options_description.add("path", 1); + } + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override + { + auto disk = client.getCurrentDiskWithPath(); + String path = getValueFromCommandLineOptionsThrow(options, "path"); + + disk.getDisk()->createFile(disk.getRelativeFromRoot(path)); + } +}; + +CommandPtr makeCommandTouch() +{ + return std::make_shared(); +} +} diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index 7ded37e067a..9c82132e284 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -1,79 +1,57 @@ -#include "ICommand.h" #include +#include "ICommand.h" -#include #include #include #include +#include namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class CommandWrite final : public ICommand { public: CommandWrite() { command_name = "write"; - command_option_description.emplace(createOptionsDescription("Allowed options", getTerminalWidth())); - description = "Write a file from `FROM_PATH` to `TO_PATH`"; - usage = "write [OPTION]... [] "; - command_option_description->add_options() - ("input", po::value(), "file from which we are reading, defaults to `stdin`"); + description = "Write a file from `path-from` to `path-to`"; + options_description.add_options()("path-from", po::value(), "file from which we are reading, defaults to `stdin` (input from `stdin` is finished by Ctrl+D)")( + "path-to", po::value(), "file to which we are writing (mandatory, positional)"); + positional_options_description.add("path-to", 1); } - void processOptions( - Poco::Util::LayeredConfiguration & config, - po::variables_map & options) const override + + void executeImpl(const CommandLineOptions & options, DisksClient & client) override { - if (options.count("input")) - config.setString("input", options["input"].as()); - } + auto disk = client.getCurrentDiskWithPath(); - void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) override - { - if (command_arguments.size() != 1) + std::optional path_from = getValueFromCommandLineOptionsWithOptional(options, "path-from"); + + String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow(options, "path-to")); + + auto in = [&]() -> std::unique_ptr { - printHelpMessage(); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } + if (!path_from.has_value()) + { + return std::make_unique(STDIN_FILENO); + } + else + { + String relative_path_from = disk.getRelativeFromRoot(path_from.value()); + return disk.getDisk()->readFile(relative_path_from); + } + }(); - String disk_name = config.getString("disk", "default"); - - const String & path = command_arguments[0]; - - DiskPtr disk = disk_selector->get(disk_name); - - String relative_path = validatePathAndGetAsRelative(path); - - String path_input = config.getString("input", ""); - std::unique_ptr in; - if (path_input.empty()) - { - in = std::make_unique(STDIN_FILENO); - } - else - { - String relative_path_input = validatePathAndGetAsRelative(path_input); - in = disk->readFile(relative_path_input); - } - - auto out = disk->writeFile(relative_path); + auto out = disk.getDisk()->writeFile(path_to); copyData(*in, *out); out->finalize(); } }; + +CommandPtr makeCommandWrite() +{ + return std::make_shared(); } -std::unique_ptr makeCommandWrite() -{ - return std::make_unique(); } diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 5da5ab4bae9..59ba45b9451 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -1,11 +1,22 @@ #include "DisksApp.h" +#include +#include +#include "Common/Exception.h" +#include "Common/filesystemHelpers.h" +#include +#include "DisksClient.h" #include "ICommand.h" +#include "ICommand_fwd.h" + +#include +#include +#include +#include #include -#include #include - +#include namespace DB { @@ -13,74 +24,289 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +}; + +LineReader::Patterns DisksApp::query_extenders = {"\\"}; +LineReader::Patterns DisksApp::query_delimiters = {""}; +String DisksApp::word_break_characters = " \t\v\f\a\b\r\n"; + +CommandPtr DisksApp::getCommandByName(const String & command) const +{ + try + { + if (auto it = aliases.find(command); it != aliases.end()) + return command_descriptions.at(it->second); + + return command_descriptions.at(command); + } + catch (std::out_of_range &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The command `{}` is unknown", command); + } } -size_t DisksApp::findCommandPos(std::vector & common_arguments) +std::vector DisksApp::getEmptyCompletion(String command_name) const { - for (size_t i = 0; i < common_arguments.size(); i++) - if (supported_commands.contains(common_arguments[i])) - return i + 1; - return common_arguments.size(); + auto command_ptr = command_descriptions.at(command_name); + std::vector answer{}; + if (multidisk_commands.contains(command_ptr->command_name)) + { + answer = client->getAllFilesByPatternFromAllDisks(""); + } + else + { + answer = client->getCurrentDiskWithPath().getAllFilesByPattern(""); + } + for (const auto & disk_name : client->getAllDiskNames()) + { + answer.push_back(disk_name); + } + for (const auto & option : command_ptr->options_description.options()) + { + answer.push_back("--" + option->long_name()); + } + if (command_name == "help") + { + for (const auto & [current_command_name, description] : command_descriptions) + { + answer.push_back(current_command_name); + } + } + std::sort(answer.begin(), answer.end()); + return answer; } -void DisksApp::printHelpMessage(ProgramOptionsDescription & command_option_description) +std::vector DisksApp::getCommandsToComplete(const String & command_prefix) const { - std::optional help_description = - createOptionsDescription("Help Message for clickhouse-disks", getTerminalWidth()); - - help_description->add(command_option_description); - - std::cout << "ClickHouse disk management tool\n"; - std::cout << "Usage: ./clickhouse-disks [OPTION]\n"; - std::cout << "clickhouse-disks\n\n"; - - for (const auto & current_command : supported_commands) - std::cout << command_descriptions[current_command]->command_name - << "\t" - << command_descriptions[current_command]->description - << "\n\n"; - - std::cout << command_option_description << '\n'; + std::vector answer{}; + for (const auto & [word, _] : command_descriptions) + { + if (word.starts_with(command_prefix)) + { + answer.push_back(word); + } + } + if (!answer.empty()) + { + std::sort(answer.begin(), answer.end()); + return answer; + } + for (const auto & [word, _] : aliases) + { + if (word.starts_with(command_prefix)) + { + answer.push_back(word); + } + } + if (!answer.empty()) + { + std::sort(answer.begin(), answer.end()); + return answer; + } + return {command_prefix}; } -String DisksApp::getDefaultConfigFileName() +std::vector DisksApp::getCompletions(const String & prefix) const { - return "/etc/clickhouse-server/config.xml"; + auto arguments = po::split_unix(prefix, word_break_characters); + if (arguments.empty()) + { + return {}; + } + if (word_break_characters.contains(prefix.back())) + { + CommandPtr command; + try + { + command = getCommandByName(arguments[0]); + } + catch (...) + { + return {arguments.back()}; + } + return getEmptyCompletion(command->command_name); + } + else if (arguments.size() == 1) + { + String command_prefix = arguments[0]; + return getCommandsToComplete(command_prefix); + } + else + { + String last_token = arguments.back(); + CommandPtr command; + try + { + command = getCommandByName(arguments[0]); + } + catch (...) + { + return {last_token}; + } + std::vector answer = {}; + if (command->command_name == "help") + { + return getCommandsToComplete(last_token); + } + else + { + answer = [&]() -> std::vector + { + if (multidisk_commands.contains(command->command_name)) + { + return client->getAllFilesByPatternFromAllDisks(last_token); + } + else + { + return client->getCurrentDiskWithPath().getAllFilesByPattern(last_token); + } + }(); + + for (const auto & disk_name : client->getAllDiskNames()) + { + if (disk_name.starts_with(last_token)) + { + answer.push_back(disk_name); + } + } + for (const auto & option : command->options_description.options()) + { + String option_sign = "--" + option->long_name(); + if (option_sign.starts_with(last_token)) + { + answer.push_back(option_sign); + } + } + } + if (!answer.empty()) + { + std::sort(answer.begin(), answer.end()); + return answer; + } + else + { + return {last_token}; + } + } } -void DisksApp::addOptions( - ProgramOptionsDescription & options_description_, - boost::program_options::positional_options_description & positional_options_description -) +bool DisksApp::processQueryText(const String & text) { - options_description_.add_options() - ("help,h", "Print common help message") - ("config-file,C", po::value(), "Set config file") - ("disk", po::value(), "Set disk name") - ("command_name", po::value(), "Name for command to do") - ("save-logs", "Save logs to a file") - ("log-level", po::value(), "Logging level") - ; + if (text.find_first_not_of(word_break_characters) == std::string::npos) + { + return true; + } + if (exit_strings.find(text) != exit_strings.end()) + return false; + CommandPtr command; + try + { + auto arguments = po::split_unix(text, word_break_characters); + command = getCommandByName(arguments[0]); + arguments.erase(arguments.begin()); + command->execute(arguments, *client); + } + catch (DB::Exception & err) + { + int code = getCurrentExceptionCode(); + if (code == ErrorCodes::LOGICAL_ERROR) + { + throw std::move(err); + } + else if (code == ErrorCodes::BAD_ARGUMENTS) + { + std::cerr << err.message() << "\n" + << "\n"; + if (command.get()) + { + std::cerr << "COMMAND: " << command->command_name << "\n"; + std::cerr << command->options_description << "\n"; + } + else + { + printAvailableCommandsHelpMessage(); + } + } + else + { + std::cerr << err.message() << "\n"; + } + } + catch (std::exception & err) + { + std::cerr << err.what() << "\n"; + } - positional_options_description.add("command_name", 1); + return true; +} - supported_commands = {"list-disks", "list", "move", "remove", "link", "copy", "write", "read", "mkdir"}; -#ifdef CLICKHOUSE_CLOUD - supported_commands.insert("packed-io"); -#endif +void DisksApp::runInteractiveReplxx() +{ + ReplxxLineReader lr( + suggest, + history_file, + /* multiline= */ false, + query_extenders, + query_delimiters, + word_break_characters.c_str(), + /* highlighter_= */ {}); + lr.enableBracketedPaste(); + + while (true) + { + DiskWithPath disk_with_path = client->getCurrentDiskWithPath(); + String prompt = "\x1b[1;34m" + disk_with_path.getDisk()->getName() + "\x1b[0m:" + "\x1b[1;31m" + disk_with_path.getCurrentPath() + + "\x1b[0m$ "; + + auto input = lr.readLine(prompt, "\x1b[1;31m:-] \x1b[0m"); + if (input.empty()) + break; + + if (!processQueryText(input)) + break; + } +} + +void DisksApp::parseAndCheckOptions( + const std::vector & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options) +{ + auto parser = po::command_line_parser(arguments).options(options_description).allow_unregistered(); + po::parsed_options parsed = parser.run(); + po::store(parsed, options); +} + +void DisksApp::addOptions() +{ + options_description.add_options()("help,h", "Print common help message")("config-file,C", po::value(), "Set config file")( + "disk", po::value(), "Set disk name")("save-logs", "Save logs to a file")( + "log-level", po::value(), "Logging level")("query,q", po::value(), "Query for a non-interactive mode")( + "test-mode", "Interactive interface in test regyme"); command_descriptions.emplace("list-disks", makeCommandListDisks()); + command_descriptions.emplace("copy", makeCommandCopy()); command_descriptions.emplace("list", makeCommandList()); + command_descriptions.emplace("cd", makeCommandChangeDirectory()); command_descriptions.emplace("move", makeCommandMove()); command_descriptions.emplace("remove", makeCommandRemove()); command_descriptions.emplace("link", makeCommandLink()); - command_descriptions.emplace("copy", makeCommandCopy()); command_descriptions.emplace("write", makeCommandWrite()); command_descriptions.emplace("read", makeCommandRead()); command_descriptions.emplace("mkdir", makeCommandMkDir()); + command_descriptions.emplace("switch-disk", makeCommandSwitchDisk()); + command_descriptions.emplace("current_disk_with_path", makeCommandGetCurrentDiskAndPath()); + command_descriptions.emplace("touch", makeCommandTouch()); + command_descriptions.emplace("help", makeCommandHelp(*this)); #ifdef CLICKHOUSE_CLOUD command_descriptions.emplace("packed-io", makeCommandPackedIO()); #endif + for (const auto & [command_name, command_ptr] : command_descriptions) + { + if (command_name != command_ptr->command_name) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Command name inside map doesn't coincide with actual command name"); + } + } } void DisksApp::processOptions() @@ -93,76 +319,122 @@ void DisksApp::processOptions() config().setBool("save-logs", true); if (options.count("log-level")) config().setString("log-level", options["log-level"].as()); + if (options.count("test-mode")) + config().setBool("test-mode", true); + if (options.count("query")) + query = std::optional{options["query"].as()}; } -DisksApp::~DisksApp() + +void DisksApp::printEntryHelpMessage() const { - if (global_context) - global_context->shutdown(); + std::cout << "\x1b[1;33m ClickHouse disk management tool \x1b[0m \n"; + std::cout << options_description << '\n'; } -void DisksApp::init(std::vector & common_arguments) + +void DisksApp::printAvailableCommandsHelpMessage() const { - stopOptionsProcessing(); + std::cout << "\x1b[1;32mAvailable commands:\x1b[0m\n"; + std::vector> commands_with_aliases_and_descrtiptions{}; + size_t maximal_command_length = 0; + for (const auto & [command_name, command_ptr] : command_descriptions) + { + std::string command_string = getCommandLineWithAliases(command_ptr); + maximal_command_length = std::max(maximal_command_length, command_string.size()); + commands_with_aliases_and_descrtiptions.push_back({std::move(command_string), command_descriptions.at(command_name)}); + } + for (const auto & [command_with_aliases, command_ptr] : commands_with_aliases_and_descrtiptions) + { + std::cout << "\x1b[1;33m" << command_with_aliases << "\x1b[0m" << std::string(5, ' ') << "\x1b[1;33m" << command_ptr->description + << "\x1b[0m \n"; + std::cout << command_ptr->options_description; + std::cout << std::endl; + } +} - ProgramOptionsDescription options_description{createOptionsDescription("clickhouse-disks", getTerminalWidth())}; +void DisksApp::printCommandHelpMessage(CommandPtr command) const +{ + String command_name_with_aliases = getCommandLineWithAliases(command); + std::cout << "\x1b[1;32m" << command_name_with_aliases << "\x1b[0m" << std::string(2, ' ') << command->description << "\n"; + std::cout << command->options_description; +} - po::positional_options_description positional_options_description; +void DisksApp::printCommandHelpMessage(String command_name) const +{ + printCommandHelpMessage(getCommandByName(command_name)); +} - addOptions(options_description, positional_options_description); +String DisksApp::getCommandLineWithAliases(CommandPtr command) const +{ + String command_string = command->command_name; + bool need_comma = false; + for (const auto & [alias_name, alias_command_name] : aliases) + { + if (alias_command_name == command->command_name) + { + if (std::exchange(need_comma, true)) + command_string += ","; + else + command_string += "("; + command_string += alias_name; + } + } + command_string += (need_comma ? ")" : ""); + return command_string; +} - size_t command_pos = findCommandPos(common_arguments); - std::vector global_flags(command_pos); - command_arguments.resize(common_arguments.size() - command_pos); - copy(common_arguments.begin(), common_arguments.begin() + command_pos, global_flags.begin()); - copy(common_arguments.begin() + command_pos, common_arguments.end(), command_arguments.begin()); +void DisksApp::initializeHistoryFile() +{ + String home_path; + const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) + if (home_path_cstr) + home_path = home_path_cstr; + if (config().has("history-file")) + history_file = config().getString("history-file"); + else + history_file = home_path + "/.disks-file-history"; - parseAndCheckOptions(options_description, positional_options_description, global_flags); + if (!history_file.empty() && !fs::exists(history_file)) + { + try + { + FS::createFile(history_file); + } + catch (const ErrnoException & e) + { + if (e.getErrno() != EEXIST) + throw; + } + } +} + +void DisksApp::init(const std::vector & common_arguments) +{ + addOptions(); + parseAndCheckOptions(common_arguments, options_description, options); po::notify(options); if (options.count("help")) { - printHelpMessage(options_description); + printEntryHelpMessage(); + printAvailableCommandsHelpMessage(); exit(0); // NOLINT(concurrency-mt-unsafe) } - if (!supported_commands.contains(command_name)) - { - std::cerr << "Unknown command name: " << command_name << "\n"; - printHelpMessage(options_description); - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); - } - processOptions(); } -void DisksApp::parseAndCheckOptions( - ProgramOptionsDescription & options_description_, - boost::program_options::positional_options_description & positional_options_description, - std::vector & arguments) +String DisksApp::getDefaultConfigFileName() { - auto parser = po::command_line_parser(arguments) - .options(options_description_) - .positional(positional_options_description) - .allow_unregistered(); - - po::parsed_options parsed = parser.run(); - po::store(parsed, options); - - auto positional_arguments = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - for (const auto & arg : positional_arguments) - { - if (command_descriptions.contains(arg)) - { - command_name = arg; - break; - } - } + return "/etc/clickhouse-server/config.xml"; } int DisksApp::main(const std::vector & /*args*/) { + std::vector keys; + config().keys(keys); if (config().has("config-file") || fs::exists(getDefaultConfigFileName())) { String config_path = config().getString("config-file", getDefaultConfigFileName()); @@ -173,9 +445,13 @@ int DisksApp::main(const std::vector & /*args*/) } else { + printEntryHelpMessage(); throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specified"); } + config().keys(keys); + initializeHistoryFile(); + if (config().has("save-logs")) { auto log_level = config().getString("log-level", "trace"); @@ -200,61 +476,68 @@ int DisksApp::main(const std::vector & /*args*/) global_context->setApplicationType(Context::ApplicationType::DISKS); String path = config().getString("path", DBMS_DEFAULT_PATH); + global_context->setPath(path); - auto & command = command_descriptions[command_name]; + String main_disk = config().getString("disk", "default"); - auto command_options = command->getCommandOptions(); - std::vector args; - if (command_options) + auto validator = [](const Poco::Util::AbstractConfiguration &, const std::string &, const std::string &) { return true; }; + + constexpr auto config_prefix = "storage_configuration.disks"; + auto disk_selector = std::make_shared(std::unordered_set{"cache", "encrypted"}); + disk_selector->initialize(config(), config_prefix, global_context, validator); + + std::vector>> disks_with_path; + + for (const auto & [_, disk_ptr] : disk_selector->getDisksMap()) { - auto parser = po::command_line_parser(command_arguments).options(*command_options).allow_unregistered(); - po::parsed_options parsed = parser.run(); - po::store(parsed, options); - po::notify(options); + disks_with_path.emplace_back( + disk_ptr, (disk_ptr->getName() == "local") ? std::optional{fs::current_path().string()} : std::nullopt); + } - args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - command->processOptions(config(), options); + + client = std::make_unique(std::move(disks_with_path), main_disk); + + suggest.setCompletionsCallback([&](const String & prefix, size_t /* prefix_length */) { return getCompletions(prefix); }); + + if (!query.has_value()) + { + runInteractive(); } else { - auto parser = po::command_line_parser(command_arguments).options({}).allow_unregistered(); - po::parsed_options parsed = parser.run(); - args = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); + processQueryText(query.value()); } - std::unordered_set disks - { - config().getString("disk", "default"), - config().getString("disk-from", config().getString("disk", "default")), - config().getString("disk-to", config().getString("disk", "default")), - }; - - auto validator = [&disks]( - const Poco::Util::AbstractConfiguration & config, - const std::string & disk_config_prefix, - const std::string & disk_name) - { - if (!disks.contains(disk_name)) - return false; - - const auto disk_type = config.getString(disk_config_prefix + ".type", "local"); - - if (disk_type == "cache") - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk type 'cache' of disk {} is not supported by clickhouse-disks", disk_name); - - return true; - }; - - constexpr auto config_prefix = "storage_configuration.disks"; - auto disk_selector = std::make_shared(); - disk_selector->initialize(config(), config_prefix, global_context, validator); - - command->execute(args, disk_selector, config()); - return Application::EXIT_OK; } +DisksApp::~DisksApp() +{ + client.reset(nullptr); + if (global_context) + global_context->shutdown(); +} + +void DisksApp::runInteractiveTestMode() +{ + for (String input; std::getline(std::cin, input);) + { + if (!processQueryText(input)) + break; + + std::cout << "\a\a\a\a" << std::endl; + std::cerr << std::flush; + } +} + +void DisksApp::runInteractive() +{ + if (config().hasOption("test-mode")) + runInteractiveTestMode(); + else + runInteractiveReplxx(); +} } int mainEntryClickHouseDisks(int argc, char ** argv) @@ -269,16 +552,16 @@ int mainEntryClickHouseDisks(int argc, char ** argv) catch (const DB::Exception & e) { std::cerr << DB::getExceptionMessage(e, false) << std::endl; - return 1; + return 0; } catch (const boost::program_options::error & e) { std::cerr << "Bad arguments: " << e.what() << std::endl; - return DB::ErrorCodes::BAD_ARGUMENTS; + return 0; } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; - return 1; + return 0; } } diff --git a/programs/disks/DisksApp.h b/programs/disks/DisksApp.h index 51bc3f58dc4..f8167884c62 100644 --- a/programs/disks/DisksApp.h +++ b/programs/disks/DisksApp.h @@ -1,61 +1,107 @@ #pragma once +#include +#include +#include #include +#include "DisksClient.h" +#include "ICommand_fwd.h" #include +#include +#include #include -#include - namespace DB { -class ICommand; -using CommandPtr = std::unique_ptr; - -namespace po = boost::program_options; using ProgramOptionsDescription = boost::program_options::options_description; using CommandLineOptions = boost::program_options::variables_map; -class DisksApp : public Poco::Util::Application, public Loggers +class DisksApp : public Poco::Util::Application { public: - DisksApp() = default; - ~DisksApp() override; + void addOptions(); - void init(std::vector & common_arguments); - - int main(const std::vector & args) override; - -protected: - static String getDefaultConfigFileName(); - - void addOptions( - ProgramOptionsDescription & options_description, - boost::program_options::positional_options_description & positional_options_description); void processOptions(); - void printHelpMessage(ProgramOptionsDescription & command_option_description); + bool processQueryText(const String & text); - size_t findCommandPos(std::vector & common_arguments); + void init(const std::vector & common_arguments); + + int main(const std::vector & /*args*/) override; + + CommandPtr getCommandByName(const String & command) const; + + void initializeHistoryFile(); + + static void parseAndCheckOptions( + const std::vector & arguments, const ProgramOptionsDescription & options_description, CommandLineOptions & options); + + void printEntryHelpMessage() const; + void printAvailableCommandsHelpMessage() const; + void printCommandHelpMessage(String command_name) const; + void printCommandHelpMessage(CommandPtr command) const; + String getCommandLineWithAliases(CommandPtr command) const; + + + std::vector getCompletions(const String & prefix) const; + + std::vector getEmptyCompletion(String command_name) const; + + ~DisksApp() override; private: - void parseAndCheckOptions( - ProgramOptionsDescription & options_description, - boost::program_options::positional_options_description & positional_options_description, - std::vector & arguments); + void runInteractive(); + void runInteractiveReplxx(); + void runInteractiveTestMode(); + + String getDefaultConfigFileName(); + + std::vector getCommandsToComplete(const String & command_prefix) const; + + // Fields responsible for the REPL work + String history_file; + LineReader::Suggest suggest; + static LineReader::Patterns query_extenders; + static LineReader::Patterns query_delimiters; + static String word_break_characters; + + // General command line arguments parsing fields -protected: - ContextMutablePtr global_context; SharedContextHolder shared_context; - - String command_name; - std::vector command_arguments; - - std::unordered_set supported_commands; + ContextMutablePtr global_context; + ProgramOptionsDescription options_description; + CommandLineOptions options; std::unordered_map command_descriptions; - po::variables_map options; -}; + std::optional query; + const std::unordered_map aliases + = {{"cp", "copy"}, + {"mv", "move"}, + {"ls", "list"}, + {"list_disks", "list-disks"}, + {"ln", "link"}, + {"rm", "remove"}, + {"cat", "read"}, + {"r", "read"}, + {"w", "write"}, + {"create", "touch"}, + {"delete", "remove"}, + {"ls-disks", "list-disks"}, + {"ls_disks", "list-disks"}, + {"packed_io", "packed-io"}, + {"change-dir", "cd"}, + {"change_dir", "cd"}, + {"switch_disk", "switch-disk"}, + {"current", "current_disk_with_path"}, + {"current_disk", "current_disk_with_path"}, + {"current_path", "current_disk_with_path"}, + {"cur", "current_disk_with_path"}}; + + std::set multidisk_commands = {"copy", "packed-io", "switch-disk", "cd"}; + + std::unique_ptr client{}; +}; } diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp new file mode 100644 index 00000000000..7e36c7911ab --- /dev/null +++ b/programs/disks/DisksClient.cpp @@ -0,0 +1,263 @@ +#include "DisksClient.h" +#include +#include +#include +#include + +#include + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +}; + +namespace DB +{ +DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional path_) : disk(disk_) +{ + if (path_.has_value()) + { + if (!fs::path{path_.value()}.is_absolute()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing path {} is not absolute", path_.value()); + } + path = path_.value(); + } + else + { + path = String{"/"}; + } + + String relative_path = normalizePathAndGetAsRelative(path); + if (disk->isDirectory(relative_path) || (relative_path.empty() && (disk->isDirectory("/")))) + { + return; + } + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Initializing path {} (normalized path: {}) at disk {} is not a directory", + path, + relative_path, + disk->getName()); +} + +std::vector DiskWithPath::listAllFilesByPath(const String & any_path) const +{ + if (isDirectory(any_path)) + { + std::vector file_names; + disk->listFiles(getRelativeFromRoot(any_path), file_names); + return file_names; + } + else + { + return {}; + } +} + +std::vector DiskWithPath::getAllFilesByPattern(const String & pattern) const +{ + auto [path_before, path_after] = [&]() -> std::pair + { + auto slash_pos = pattern.find_last_of('/'); + if (slash_pos >= pattern.size()) + { + return {"", pattern}; + } + else + { + return {pattern.substr(0, slash_pos + 1), pattern.substr(slash_pos + 1, pattern.size() - slash_pos - 1)}; + } + }(); + + if (!isDirectory(path_before)) + { + return {}; + } + else + { + std::vector file_names = listAllFilesByPath(path_before); + + std::vector answer; + + for (const auto & file_name : file_names) + { + if (file_name.starts_with(path_after)) + { + String file_pattern = path_before + file_name; + if (isDirectory(file_pattern)) + { + file_pattern = file_pattern + "/"; + } + answer.push_back(file_pattern); + } + } + return answer; + } +}; + +void DiskWithPath::setPath(const String & any_path) +{ + if (isDirectory(any_path)) + { + path = getAbsolutePath(any_path); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} at disk {} is not a directory", any_path, disk->getName()); + } +} + +String DiskWithPath::validatePathAndGetAsRelative(const String & path) +{ + String lexically_normal_path = fs::path(path).lexically_normal(); + if (lexically_normal_path.find("..") != std::string::npos) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path); + + /// If path is absolute we should keep it as relative inside disk, so disk will look like + /// an ordinary filesystem with root. + if (fs::path(lexically_normal_path).is_absolute()) + return lexically_normal_path.substr(1); + + return lexically_normal_path; +} + +String DiskWithPath::normalizePathAndGetAsRelative(const String & messyPath) +{ + std::filesystem::path path(messyPath); + std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); + String npath = canonical_path.make_preferred().string(); + return validatePathAndGetAsRelative(npath); +} + +String DiskWithPath::normalizePath(const String & path) +{ + std::filesystem::path canonical_path = std::filesystem::weakly_canonical(path); + return canonical_path.make_preferred().string(); +} + +DisksClient::DisksClient(std::vector>> && disks_with_paths, std::optional begin_disk) +{ + if (disks_with_paths.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Initializing array of disks is empty"); + } + if (!begin_disk.has_value()) + { + begin_disk = disks_with_paths[0].first->getName(); + } + bool has_begin_disk = false; + for (auto & [disk, path] : disks_with_paths) + { + addDisk(disk, path); + if (disk->getName() == begin_disk.value()) + { + has_begin_disk = true; + } + } + if (!has_begin_disk) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no begin_disk '{}' in initializing array", begin_disk.value()); + } + current_disk = std::move(begin_disk.value()); +} + +const DiskWithPath & DisksClient::getDiskWithPath(const String & disk) const +{ + try + { + return disks.at(disk); + } + catch (...) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); + } +} + +DiskWithPath & DisksClient::getDiskWithPath(const String & disk) +{ + try + { + return disks.at(disk); + } + catch (...) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk); + } +} + +const DiskWithPath & DisksClient::getCurrentDiskWithPath() const +{ + try + { + return disks.at(current_disk); + } + catch (...) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); + } +} + +DiskWithPath & DisksClient::getCurrentDiskWithPath() +{ + try + { + return disks.at(current_disk); + } + catch (...) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no current disk in client"); + } +} + +void DisksClient::switchToDisk(const String & disk_, const std::optional & path_) +{ + if (disks.contains(disk_)) + { + if (path_.has_value()) + { + disks.at(disk_).setPath(path_.value()); + } + current_disk = disk_; + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' is unknown", disk_); + } +} + +std::vector DisksClient::getAllDiskNames() const +{ + std::vector answer{}; + answer.reserve(disks.size()); + for (const auto & [disk_name, _] : disks) + { + answer.push_back(disk_name); + } + return answer; +} + +std::vector DisksClient::getAllFilesByPatternFromAllDisks(const String & pattern) const +{ + std::vector answer{}; + for (const auto & [_, disk] : disks) + { + for (auto & word : disk.getAllFilesByPattern(pattern)) + { + answer.push_back(word); + } + } + return answer; +} + +void DisksClient::addDisk(DiskPtr disk_, const std::optional & path_) +{ + String disk_name = disk_->getName(); + if (disks.contains(disk_->getName())) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The disk '{}' already exists", disk_name); + } + disks.emplace(disk_name, DiskWithPath{disk_, path_}); +} +} diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h new file mode 100644 index 00000000000..8a55d22af93 --- /dev/null +++ b/programs/disks/DisksClient.h @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "Disks/IDisk.h" + +#include +#include +#include + +namespace fs = std::filesystem; + +namespace DB +{ + +std::vector split(const String & text, const String & delimiters); + +using ProgramOptionsDescription = boost::program_options::options_description; +using CommandLineOptions = boost::program_options::variables_map; + +class DiskWithPath +{ +public: + explicit DiskWithPath(DiskPtr disk_, std::optional path_ = std::nullopt); + + String getAbsolutePath(const String & any_path) const { return normalizePath(fs::path(path) / any_path); } + + String getCurrentPath() const { return path; } + + bool isDirectory(const String & any_path) const + { + return disk->isDirectory(getRelativeFromRoot(any_path)) || (getRelativeFromRoot(any_path).empty() && (disk->isDirectory("/"))); + } + + std::vector listAllFilesByPath(const String & any_path) const; + + std::vector getAllFilesByPattern(const String & pattern) const; + + DiskPtr getDisk() const { return disk; } + + void setPath(const String & any_path); + + String getRelativeFromRoot(const String & any_path) const { return normalizePathAndGetAsRelative(getAbsolutePath(any_path)); } + +private: + static String validatePathAndGetAsRelative(const String & path); + static std::string normalizePathAndGetAsRelative(const std::string & messyPath); + static std::string normalizePath(const std::string & messyPath); + + const DiskPtr disk; + String path; +}; + +class DisksClient +{ +public: + explicit DisksClient(std::vector>> && disks_with_paths, std::optional begin_disk); + + const DiskWithPath & getDiskWithPath(const String & disk) const; + + DiskWithPath & getDiskWithPath(const String & disk); + + const DiskWithPath & getCurrentDiskWithPath() const; + + DiskWithPath & getCurrentDiskWithPath(); + + DiskPtr getCurrentDisk() const { return getCurrentDiskWithPath().getDisk(); } + + DiskPtr getDisk(const String & disk) const { return getDiskWithPath(disk).getDisk(); } + + void switchToDisk(const String & disk_, const std::optional & path_); + + std::vector getAllDiskNames() const; + + std::vector getAllFilesByPatternFromAllDisks(const String & pattern) const; + + +private: + void addDisk(DiskPtr disk_, const std::optional & path_); + + String current_disk; + std::unordered_map disks; +}; +} diff --git a/programs/disks/ICommand.cpp b/programs/disks/ICommand.cpp index 86188fb6db1..f622bcad3c6 100644 --- a/programs/disks/ICommand.cpp +++ b/programs/disks/ICommand.cpp @@ -1,5 +1,5 @@ #include "ICommand.h" -#include +#include "DisksClient.h" namespace DB @@ -10,43 +10,42 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -void ICommand::printHelpMessage() const +CommandLineOptions ICommand::processCommandLineArguments(const Strings & commands) { - std::cout << "Command: " << command_name << '\n'; - std::cout << "Description: " << description << '\n'; - std::cout << "Usage: " << usage << '\n'; + CommandLineOptions options; + auto parser = po::command_line_parser(commands); + parser.options(options_description).positional(positional_options_description); - if (command_option_description) + po::parsed_options parsed = parser.run(); + po::store(parsed, options); + + return options; +} + +void ICommand::execute(const Strings & commands, DisksClient & client) +{ + try { - auto options = *command_option_description; - if (!options.options().empty()) - std::cout << options << '\n'; + processCommandLineArguments(commands); + } + catch (std::exception & exc) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}", exc.what()); + } + executeImpl(processCommandLineArguments(commands), client); +} + +DiskWithPath & ICommand::getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name) +{ + auto disk_name = getValueFromCommandLineOptionsWithOptional(options, name); + if (disk_name.has_value()) + { + return client.getDiskWithPath(disk_name.value()); + } + else + { + return client.getCurrentDiskWithPath(); } } -void ICommand::addOptions(ProgramOptionsDescription & options_description) -{ - if (!command_option_description || command_option_description->options().empty()) - return; - - options_description.add(*command_option_description); -} - -String ICommand::validatePathAndGetAsRelative(const String & path) -{ - /// If path contain non-normalized symbols like . we will normalized them. If the resulting normalized path - /// still contain '..' it can be dangerous, disallow such paths. Also since clickhouse-disks - /// is not an interactive program (don't track you current path) it's OK to disallow .. paths. - String lexically_normal_path = fs::path(path).lexically_normal(); - if (lexically_normal_path.find("..") != std::string::npos) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path); - - /// If path is absolute we should keep it as relative inside disk, so disk will look like - /// an ordinary filesystem with root. - if (fs::path(lexically_normal_path).is_absolute()) - return lexically_normal_path.substr(1); - - return lexically_normal_path; -} - } diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h index efe350fe87b..6faf90e2b52 100644 --- a/programs/disks/ICommand.h +++ b/programs/disks/ICommand.h @@ -1,66 +1,146 @@ #pragma once -#include +#include #include +#include +#include #include -#include #include +#include "Common/Exception.h" +#include -#include +#include + +#include "DisksApp.h" + +#include "DisksClient.h" + +#include "ICommand_fwd.h" namespace DB { namespace po = boost::program_options; -using ProgramOptionsDescription = boost::program_options::options_description; -using CommandLineOptions = boost::program_options::variables_map; +using ProgramOptionsDescription = po::options_description; +using PositionalProgramOptionsDescription = po::positional_options_description; +using CommandLineOptions = po::variables_map; + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} class ICommand { public: - ICommand() = default; + explicit ICommand() = default; virtual ~ICommand() = default; - virtual void execute( - const std::vector & command_arguments, - std::shared_ptr & disk_selector, - Poco::Util::LayeredConfiguration & config) = 0; + void execute(const Strings & commands, DisksClient & client); - const std::optional & getCommandOptions() const { return command_option_description; } + virtual void executeImpl(const CommandLineOptions & options, DisksClient & client) = 0; - void addOptions(ProgramOptionsDescription & options_description); - - virtual void processOptions(Poco::Util::LayeredConfiguration & config, po::variables_map & options) const = 0; + CommandLineOptions processCommandLineArguments(const Strings & commands); protected: - void printHelpMessage() const; + template + static T getValueFromCommandLineOptions(const CommandLineOptions & options, const String & name) + { + try + { + return options[name].as(); + } + catch (boost::bad_any_cast &) + { + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Argument '{}' has wrong type and can't be parsed", name); + } + } + + template + static T getValueFromCommandLineOptionsThrow(const CommandLineOptions & options, const String & name) + { + if (options.count(name)) + { + return getValueFromCommandLineOptions(options, name); + } + else + { + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Mandatory argument '{}' is missing", name); + } + } + + template + static T getValueFromCommandLineOptionsWithDefault(const CommandLineOptions & options, const String & name, const T & default_value) + { + if (options.count(name)) + { + return getValueFromCommandLineOptions(options, name); + } + else + { + return default_value; + } + } + + template + static std::optional getValueFromCommandLineOptionsWithOptional(const CommandLineOptions & options, const String & name) + { + if (options.count(name)) + { + return std::optional{getValueFromCommandLineOptions(options, name)}; + } + else + { + return std::nullopt; + } + } + + DiskWithPath & getDiskWithPath(DisksClient & client, const CommandLineOptions & options, const String & name); + + String getTargetLocation(const String & path_from, DiskWithPath & disk_to, const String & path_to) + { + if (!disk_to.getDisk()->isDirectory(path_to)) + { + return path_to; + } + String copied_path_from = path_from; + if (copied_path_from.ends_with('/')) + { + copied_path_from.pop_back(); + } + String plain_filename = fs::path(copied_path_from).filename(); + + return fs::path{path_to} / plain_filename; + } - static String validatePathAndGetAsRelative(const String & path); public: String command_name; String description; + ProgramOptionsDescription options_description; protected: - std::optional command_option_description; - String usage; - po::positional_options_description positional_options_description; + PositionalProgramOptionsDescription positional_options_description; }; -using CommandPtr = std::unique_ptr; - -} - DB::CommandPtr makeCommandCopy(); -DB::CommandPtr makeCommandLink(); -DB::CommandPtr makeCommandList(); DB::CommandPtr makeCommandListDisks(); +DB::CommandPtr makeCommandList(); +DB::CommandPtr makeCommandChangeDirectory(); +DB::CommandPtr makeCommandLink(); DB::CommandPtr makeCommandMove(); DB::CommandPtr makeCommandRead(); DB::CommandPtr makeCommandRemove(); DB::CommandPtr makeCommandWrite(); DB::CommandPtr makeCommandMkDir(); +DB::CommandPtr makeCommandSwitchDisk(); +DB::CommandPtr makeCommandGetCurrentDiskAndPath(); +DB::CommandPtr makeCommandHelp(const DisksApp & disks_app); +DB::CommandPtr makeCommandTouch(); +#ifdef CLICKHOUSE_CLOUD DB::CommandPtr makeCommandPackedIO(); +#endif +} diff --git a/programs/disks/ICommand_fwd.h b/programs/disks/ICommand_fwd.h new file mode 100644 index 00000000000..84310b4a18d --- /dev/null +++ b/programs/disks/ICommand_fwd.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ +class ICommand; + +using CommandPtr = std::shared_ptr; +} diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 967920557e1..44c2daa33ad 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -264,6 +264,35 @@ HTTPContextPtr httpContext() return std::make_shared(Context::getGlobalContextInstance()); } +String getKeeperPath(Poco::Util::LayeredConfiguration & config) +{ + String path; + if (config.has("keeper_server.storage_path")) + { + path = config.getString("keeper_server.storage_path"); + } + else if (config.has("keeper_server.log_storage_path")) + { + path = std::filesystem::path(config.getString("keeper_server.log_storage_path")).parent_path(); + } + else if (config.has("keeper_server.snapshot_storage_path")) + { + path = std::filesystem::path(config.getString("keeper_server.snapshot_storage_path")).parent_path(); + } + else if (std::filesystem::is_directory(std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination")) + { + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly", + KEEPER_DEFAULT_PATH, String{std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination"}); + } + else + { + path = KEEPER_DEFAULT_PATH; + } + return path; +} + + } int Keeper::main(const std::vector & /*args*/) @@ -316,31 +345,7 @@ try updateMemorySoftLimitInConfig(config()); - std::string path; - - if (config().has("keeper_server.storage_path")) - { - path = config().getString("keeper_server.storage_path"); - } - else if (config().has("keeper_server.log_storage_path")) - { - path = std::filesystem::path(config().getString("keeper_server.log_storage_path")).parent_path(); - } - else if (config().has("keeper_server.snapshot_storage_path")) - { - path = std::filesystem::path(config().getString("keeper_server.snapshot_storage_path")).parent_path(); - } - else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination")) - { - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, - "By default 'keeper_server.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper_server.storage_path' in the keeper configuration explicitly", - KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"}); - } - else - { - path = KEEPER_DEFAULT_PATH; - } - + std::string path = getKeeperPath(config()); std::filesystem::create_directories(path); /// Check that the process user id matches the owner of the data. @@ -554,7 +559,7 @@ try auto main_config_reloader = std::make_unique( config_path, extra_paths, - config().getString("path", KEEPER_DEFAULT_PATH), + getKeeperPath(config()), std::move(unused_cache), unused_event, [&](ConfigurationPtr config, bool /* initial_loading */) diff --git a/programs/keeper/conf.d/local.yaml b/programs/keeper/conf.d/local.yaml new file mode 100644 index 00000000000..722e90e374a --- /dev/null +++ b/programs/keeper/conf.d/local.yaml @@ -0,0 +1,9 @@ +logger: + log: + "@remove": remove + errorlog: + "@remove": remove + console: 1 +keeper_server: + log_storage_path: ./logs + snapshot_storage_path: ./snapshots diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 066877b1cec..3b88bb36954 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -587,6 +587,54 @@ static void sanityChecks(Server & server) } } +void loadStartupScripts(const Poco::Util::AbstractConfiguration & config, ContextMutablePtr context, Poco::Logger * log) +{ + try + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys("startup_scripts", keys); + + SetResultDetailsFunc callback; + for (const auto & key : keys) + { + std::string full_prefix = "startup_scripts." + key; + + if (config.has(full_prefix + ".condition")) + { + auto condition = config.getString(full_prefix + ".condition"); + auto condition_read_buffer = ReadBufferFromString(condition); + auto condition_write_buffer = WriteBufferFromOwnString(); + + LOG_DEBUG(log, "Checking startup query condition `{}`", condition); + executeQuery(condition_read_buffer, condition_write_buffer, true, context, callback, QueryFlags{ .internal = true }, std::nullopt, {}); + + auto result = condition_write_buffer.str(); + + if (result != "1\n" && result != "true\n") + { + if (result != "0\n" && result != "false\n") + context->addWarningMessage(fmt::format("The condition query returned `{}`, which can't be interpreted as a boolean (`0`, `false`, `1`, `true`). Will skip this query.", result)); + + continue; + } + + LOG_DEBUG(log, "Condition is true, will execute the query next"); + } + + auto query = config.getString(full_prefix + ".query"); + auto read_buffer = ReadBufferFromString(query); + auto write_buffer = WriteBufferFromOwnString(); + + LOG_DEBUG(log, "Executing query `{}`", query); + executeQuery(read_buffer, write_buffer, true, context, callback, QueryFlags{ .internal = true }, std::nullopt, {}); + } + } + catch (...) + { + tryLogCurrentException(log, "Failed to parse startup scripts file"); + } +} + static void initializeAzureSDKLogger( [[ maybe_unused ]] const ServerSettings & server_settings, [[ maybe_unused ]] int server_logs_level) @@ -626,6 +674,28 @@ static void initializeAzureSDKLogger( #endif } +#if defined(SANITIZER) +static std::vector getSanitizerNames() +{ + std::vector names; + +#if defined(ADDRESS_SANITIZER) + names.push_back("address"); +#endif +#if defined(THREAD_SANITIZER) + names.push_back("thread"); +#endif +#if defined(MEMORY_SANITIZER) + names.push_back("memory"); +#endif +#if defined(UNDEFINED_BEHAVIOR_SANITIZER) + names.push_back("undefined behavior"); +#endif + + return names; +} +#endif + int Server::main(const std::vector & /*args*/) try { @@ -716,7 +786,17 @@ try global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable."); #if defined(SANITIZER) - global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); + auto sanitizers = getSanitizerNames(); + + String log_message; + if (sanitizers.empty()) + log_message = "sanitizer"; + else if (sanitizers.size() == 1) + log_message = fmt::format("{} sanitizer", sanitizers.front()); + else + log_message = fmt::format("sanitizers ({})", fmt::join(sanitizers, ", ")); + + global_context->addWarningMessage(fmt::format("Server was built with {}. It will work slowly.", log_message)); #endif #if defined(SANITIZE_COVERAGE) || WITH_COVERAGE @@ -1958,6 +2038,11 @@ try /// otherwise there is a race condition between the system database initialization /// and creation of new tables in the database. waitLoad(TablesLoaderForegroundPoolId, system_startup_tasks); + + /// Startup scripts can depend on the system log tables. + if (config().has("startup_scripts") && !server_settings.prepare_system_log_tables_on_startup.changed) + global_context->setServerSetting("prepare_system_log_tables_on_startup", true); + /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); @@ -2106,6 +2191,9 @@ try load_metadata_tasks.clear(); load_metadata_tasks.shrink_to_fit(); + if (config().has("startup_scripts")) + loadStartupScripts(config(), global_context, log); + { std::lock_guard lock(servers_lock); for (auto & server : servers) diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index f52d724f346..e5ee8a0d0b2 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -33,7 +33,7 @@ public: return; auto * function_node = node->as(); - if (!function_node || !function_node->isAggregateFunction()) + if (!function_node || !function_node->isAggregateFunction() || !function_node->getResultType()->equals(DataTypeUInt64())) return; auto function_name = function_node->getFunctionName(); diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 51cbe6f3d6f..4effc3adafa 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -187,9 +187,9 @@ void HedgedConnections::sendQuery( modified_settings.group_by_two_level_threshold_bytes = 0; } - const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; + const bool enable_offset_parallel_processing = context->canUseOffsetParallelReplicas(); - if (offset_states.size() > 1 && enable_sample_offset_parallel_processing) + if (offset_states.size() > 1 && enable_offset_parallel_processing) { modified_settings.parallel_replicas_count = offset_states.size(); modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset; @@ -201,7 +201,8 @@ void HedgedConnections::sendQuery( /// all servers involved in the distributed query processing. modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); - replica.connection->sendQuery(timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); + replica.connection->sendQuery( + timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {}); replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout); replica.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout); }; diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 99bdd706d8b..bcef286ecbc 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -23,8 +24,8 @@ namespace ErrorCodes } -MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler) - : settings(settings_) +MultiplexedConnections::MultiplexedConnections(Connection & connection, ContextPtr context_, const ThrottlerPtr & throttler) + : context(std::move(context_)), settings(context->getSettingsRef()) { connection.setThrottler(throttler); @@ -36,9 +37,9 @@ MultiplexedConnections::MultiplexedConnections(Connection & connection, const Se } -MultiplexedConnections::MultiplexedConnections(std::shared_ptr connection_ptr_, const Settings & settings_, const ThrottlerPtr & throttler) - : settings(settings_) - , connection_ptr(connection_ptr_) +MultiplexedConnections::MultiplexedConnections( + std::shared_ptr connection_ptr_, ContextPtr context_, const ThrottlerPtr & throttler) + : context(std::move(context_)), settings(context->getSettingsRef()), connection_ptr(connection_ptr_) { connection_ptr->setThrottler(throttler); @@ -50,9 +51,8 @@ MultiplexedConnections::MultiplexedConnections(std::shared_ptr conne } MultiplexedConnections::MultiplexedConnections( - std::vector && connections, - const Settings & settings_, const ThrottlerPtr & throttler) - : settings(settings_) + std::vector && connections, ContextPtr context_, const ThrottlerPtr & throttler) + : context(std::move(context_)), settings(context->getSettingsRef()) { /// If we didn't get any connections from pool and getMany() did not throw exceptions, this means that /// `skip_unavailable_shards` was set. Then just return. @@ -156,18 +156,18 @@ void MultiplexedConnections::sendQuery( /// all servers involved in the distributed query processing. modified_settings.set("allow_experimental_analyzer", static_cast(modified_settings.allow_experimental_analyzer)); - const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; + const bool enable_offset_parallel_processing = context->canUseOffsetParallelReplicas(); size_t num_replicas = replica_states.size(); if (num_replicas > 1) { - if (enable_sample_offset_parallel_processing) + if (enable_offset_parallel_processing) /// Use multiple replicas for parallel query processing. modified_settings.parallel_replicas_count = num_replicas; for (size_t i = 0; i < num_replicas; ++i) { - if (enable_sample_offset_parallel_processing) + if (enable_offset_parallel_processing) modified_settings.parallel_replica_offset = i; replica_states[i].connection->sendQuery( diff --git a/src/Client/MultiplexedConnections.h b/src/Client/MultiplexedConnections.h index 9f7b47e0562..dec32e52d4f 100644 --- a/src/Client/MultiplexedConnections.h +++ b/src/Client/MultiplexedConnections.h @@ -10,7 +10,6 @@ namespace DB { - /** To retrieve data directly from multiple replicas (connections) from one shard * within a single thread. As a degenerate case, it can also work with one connection. * It is assumed that all functions except sendCancel are always executed in one thread. @@ -21,14 +20,12 @@ class MultiplexedConnections final : public IConnections { public: /// Accepts ready connection. - MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler_); + MultiplexedConnections(Connection & connection, ContextPtr context_, const ThrottlerPtr & throttler_); /// Accepts ready connection and keep it alive before drain - MultiplexedConnections(std::shared_ptr connection_, const Settings & settings_, const ThrottlerPtr & throttler_); + MultiplexedConnections(std::shared_ptr connection_, ContextPtr context_, const ThrottlerPtr & throttler_); /// Accepts a vector of connections to replicas of one shard already taken from pool. - MultiplexedConnections( - std::vector && connections, - const Settings & settings_, const ThrottlerPtr & throttler_); + MultiplexedConnections(std::vector && connections, ContextPtr context_, const ThrottlerPtr & throttler_); void sendScalarsData(Scalars & data) override; void sendExternalTablesData(std::vector & data) override; @@ -86,6 +83,7 @@ private: /// Mark the replica as invalid. void invalidateReplica(ReplicaState & replica_state); + ContextPtr context; const Settings & settings; /// The current number of valid connections to the replicas of this shard. diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index ded56b60e64..90ef974010c 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1093,10 +1093,4 @@ void ColumnObject::finalize() checkObjectHasNoAmbiguosPaths(getKeys()); } -void ColumnObject::updateHashFast(SipHash & hash) const -{ - for (const auto & entry : subcolumns) - for (auto & part : entry->data.data) - part->updateHashFast(hash); -} } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index b1b8827622f..e2936b27994 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -242,7 +242,7 @@ public: const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } - void updateHashFast(SipHash & hash) const override; + void updateHashFast(SipHash &) const override { throwMustBeConcrete(); } void expand(const Filter &, bool) override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); } size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); } diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 2159495b68f..f262a8676b7 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -711,7 +711,13 @@ void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_c ColumnPtr ColumnTuple::compress() const { if (columns.empty()) - return Ptr(); + { + return ColumnCompressed::create(size(), 0, + [n = column_length] + { + return ColumnTuple::create(n); + }); + } size_t byte_size = 0; Columns compressed; diff --git a/src/Common/BinStringDecodeHelper.h b/src/Common/BinStringDecodeHelper.h index df3e014cfad..03c175fd37f 100644 --- a/src/Common/BinStringDecodeHelper.h +++ b/src/Common/BinStringDecodeHelper.h @@ -5,7 +5,7 @@ namespace DB { -static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size = 2) +static void inline hexStringDecode(const char * pos, const char * end, char *& out, size_t word_size) { if ((end - pos) & 1) { @@ -23,7 +23,7 @@ static void inline hexStringDecode(const char * pos, const char * end, char *& o ++out; } -static void inline binStringDecode(const char * pos, const char * end, char *& out) +static void inline binStringDecode(const char * pos, const char * end, char *& out, size_t word_size) { if (pos == end) { @@ -53,7 +53,7 @@ static void inline binStringDecode(const char * pos, const char * end, char *& o ++out; } - assert((end - pos) % 8 == 0); + chassert((end - pos) % word_size == 0); while (end - pos != 0) { diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h deleted file mode 100644 index 97c0c3fbc06..00000000000 --- a/src/Common/CollectionOfDerived.h +++ /dev/null @@ -1,184 +0,0 @@ -#pragma once - -#include - -#include - -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -/* This is a collections of objects derived from ItemBase. -* Collection contains no more than one instance for each derived type. -* The derived type is used to access the instance. -*/ - -template -class CollectionOfDerivedItems -{ -public: - using Self = CollectionOfDerivedItems; - using ItemPtr = std::shared_ptr; - -private: - struct Rec - { - std::type_index type_idx; - ItemPtr ptr; - - bool operator<(const Rec & other) const - { - return type_idx < other.type_idx; - } - - bool operator<(const std::type_index & value) const - { - return type_idx < value; - } - - bool operator==(const Rec & other) const - { - return type_idx == other.type_idx; - } - }; - using Records = std::vector; - -public: - void swap(Self & other) noexcept - { - records.swap(other.records); - } - - void clear() - { - records.clear(); - } - - bool empty() const - { - return records.empty(); - } - - size_t size() const - { - return records.size(); - } - - Self clone() const - { - Self result; - result.records.reserve(records.size()); - for (const auto & rec : records) - result.records.emplace_back(rec.type_idx, rec.ptr->clone()); - return result; - } - - void append(Self && other) - { - auto middle_idx = records.size(); - std::move(other.records.begin(), other.records.end(), std::back_inserter(records)); - std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end()); - chassert(isUniqTypes()); - } - - template - void add(std::shared_ptr info) - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - return addImpl(std::type_index(typeid(T)), std::move(info)); - } - - template - std::shared_ptr get() const - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - auto it = getImpl(std::type_index(typeid(T))); - if (it == records.cend()) - return nullptr; - auto cast = std::dynamic_pointer_cast(it->ptr); - chassert(cast); - return cast; - } - - template - std::shared_ptr extract() - { - static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); - auto it = getImpl(std::type_index(typeid(T))); - if (it == records.cend()) - return nullptr; - auto cast = std::dynamic_pointer_cast(it->ptr); - chassert(cast); - - records.erase(it); - return cast; - } - - std::string debug() const - { - std::string result; - - for (auto & rec : records) - { - result.append(rec.type_idx.name()); - result.append(" "); - } - - return result; - } - -private: - bool isUniqTypes() const - { - auto uniq_it = std::adjacent_find(records.begin(), records.end()); - - return uniq_it == records.end(); - } - - void addImpl(std::type_index type_idx, ItemPtr item) - { - auto it = std::lower_bound(records.begin(), records.end(), type_idx); - - if (it == records.end()) - { - records.emplace_back(type_idx, item); - return; - } - - if (it->type_idx == type_idx) - throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name()); - - - records.emplace(it, type_idx, item); - - chassert(isUniqTypes()); - } - - Records::const_iterator getImpl(std::type_index type_idx) const - { - auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx); - - if (it == records.cend()) - return records.cend(); - - if (it->type_idx != type_idx) - return records.cend(); - - return it; - } - - Records records; -}; - -} diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 8516a88c7af..7c97e73f278 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -267,7 +267,7 @@ M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \ M(S3Requests, "S3 requests count") \ M(KeeperAliveConnections, "Number of alive connections") \ - M(KeeperOutstandingRequets, "Number of outstanding requests") \ + M(KeeperOutstandingRequests, "Number of outstanding requests") \ M(ThreadsInOvercommitTracker, "Number of waiting threads inside of OvercommitTracker") \ M(IOUringPendingEvents, "Number of io_uring SQEs waiting to be submitted") \ M(IOUringInFlightEvents, "Number of io_uring SQEs in flight") \ diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 410576c2b4a..c133e9f5617 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -31,8 +31,10 @@ target_link_libraries (arena_with_free_lists PRIVATE dbms) clickhouse_add_executable (lru_hash_map_perf lru_hash_map_perf.cpp) target_link_libraries (lru_hash_map_perf PRIVATE dbms) -clickhouse_add_executable (thread_creation_latency thread_creation_latency.cpp) -target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io) +if (OS_LINUX) + clickhouse_add_executable (thread_creation_latency thread_creation_latency.cpp) + target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io) +endif() clickhouse_add_executable (array_cache array_cache.cpp) target_link_libraries (array_cache PRIVATE clickhouse_common_io) diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index b4241235cc7..7589e3393be 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -372,7 +372,7 @@ extern const std::vector keeper_profile_events M(AsynchronousReadWait) \ M(S3Requests) \ M(KeeperAliveConnections) \ - M(KeeperOutstandingRequets) \ + M(KeeperOutstandingRequests) \ M(ThreadsInOvercommitTracker) \ M(IOUringPendingEvents) \ M(IOUringInFlightEvents) \ diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index f36b1ef151f..6f57fa6d2e2 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -28,7 +28,7 @@ namespace CurrentMetrics { extern const Metric KeeperAliveConnections; - extern const Metric KeeperOutstandingRequets; + extern const Metric KeeperOutstandingRequests; } namespace ProfileEvents @@ -139,7 +139,7 @@ void KeeperDispatcher::requestThread() { if (requests_queue->tryPop(request, max_wait)) { - CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests); if (shutdown_called) break; @@ -171,7 +171,7 @@ void KeeperDispatcher::requestThread() /// Trying to get batch requests as fast as possible if (requests_queue->tryPop(request)) { - CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests); /// Don't append read request into batch, we have to process them separately if (!coordination_settings->quorum_reads && request.request->isReadRequest()) { @@ -419,7 +419,7 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ { throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot push request to queue within operation timeout"); } - CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests); return true; } @@ -543,7 +543,7 @@ void KeeperDispatcher::shutdown() /// Set session expired for all pending requests while (requests_queue && requests_queue->tryPop(request_for_session)) { - CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequests); auto response = request_for_session.request->makeResponse(); response->error = Coordination::Error::ZSESSIONEXPIRED; setResponse(request_for_session.session_id, response); @@ -670,7 +670,7 @@ void KeeperDispatcher::sessionCleanerTask() }; if (!requests_queue->push(std::move(request_info))) LOG_INFO(log, "Cannot push close request to queue while cleaning outdated sessions"); - CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests); /// Remove session from registered sessions finishSession(dead_session); @@ -794,7 +794,7 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) /// Push new session request to queue if (!requests_queue->tryPush(std::move(request_info), session_timeout_ms)) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot push session id request to queue within session timeout"); - CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); + CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequests); if (future.wait_for(std::chrono::milliseconds(session_timeout_ms)) != std::future_status::ready) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Cannot receive session id within session timeout"); diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index d473810bcb8..ad29d59e4a3 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -153,6 +153,7 @@ namespace DB M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ + M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \ M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a6916e0fbc4..cf7e7a3af39 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -36,7 +36,7 @@ class IColumn; M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ - M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \ + M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \ M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \ M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \ @@ -167,9 +167,6 @@ class IColumn; M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \ M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \ \ - M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \ - M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \ - \ M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \ M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \ M(Bool, alter_move_to_space_execute_async, false, "Execute ALTER TABLE MOVE ... TO [DISK|VOLUME] asynchronously", 0) \ @@ -205,21 +202,6 @@ class IColumn; M(Bool, group_by_use_nulls, false, "Treat columns mentioned in ROLLUP, CUBE or GROUPING SETS as Nullable", 0) \ \ M(NonZeroUInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled. Should be always greater than 0", 0) \ - M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \ - M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \ - M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \ - M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \ - M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \ - M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \ - \ - M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \ - M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \ - M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \ - M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \ - M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ - M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ - M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ - M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ \ M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard.", 0) \ \ @@ -251,8 +233,6 @@ class IColumn; M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \ M(Bool, split_parts_ranges_into_intersecting_and_non_intersecting_final, true, "Split parts ranges into intersecting and non intersecting during FINAL optimization", 0) \ M(Bool, split_intersecting_parts_ranges_into_layers_final, true, "Split intersecting parts ranges into layers during FINAL optimization", 0) \ - M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ - M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \ \ M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \ M(Bool, mysql_map_string_to_text_in_show_columns, true, "If enabled, String type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Has an effect only when the connection is made through the MySQL wire protocol.", 0) \ @@ -341,7 +321,6 @@ class IColumn; M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \ \ M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \ - M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \ \ M(JoinStrictness, join_default_strictness, JoinStrictness::All, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \ M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \ @@ -392,7 +371,6 @@ class IColumn; M(Bool, empty_result_for_aggregation_by_constant_keys_on_empty_set, true, "Return empty result when aggregating by constant keys on empty set.", 0) \ M(Bool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.", 0) \ M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \ - M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ M(Bool, enable_deflate_qpl_codec, false, "Enable/disable the DEFLATE_QPL codec.", 0) \ M(Bool, enable_zstd_qat_codec, false, "Enable/disable the ZSTD_QAT codec.", 0) \ M(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ @@ -402,8 +380,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \ - M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ + \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \ \ @@ -553,6 +530,7 @@ class IColumn; M(Bool, optimize_read_in_order, true, "Enable ORDER BY optimization for reading data in corresponding order in MergeTree tables.", 0) \ M(Bool, optimize_read_in_window_order, true, "Enable ORDER BY optimization in window clause for reading data in corresponding order in MergeTree tables.", 0) \ M(Bool, optimize_aggregation_in_order, false, "Enable GROUP BY optimization for aggregating data in corresponding order in MergeTree tables.", 0) \ + M(Bool, read_in_order_use_buffering, true, "Use buffering before merging while reading in order of primary key. It increases the parallelism of query execution", 0) \ M(UInt64, aggregation_in_order_max_block_bytes, 50000000, "Maximal size of block in bytes accumulated during aggregation in order of primary key. Lower block size allows to parallelize more final merge stage of aggregation.", 0) \ M(UInt64, read_in_order_two_level_merge_threshold, 100, "Minimal number of parts to read to run preliminary merge step during multithread reading in order of primary key.", 0) \ M(Bool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.", 0) \ @@ -593,13 +571,6 @@ class IColumn; M(UInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up an incredible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.", 0) \ M(UInt64, distributed_replica_max_ignored_errors, 0, "Number of errors that will be ignored while choosing replicas", 0) \ \ - M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \ - M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \ - M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \ - M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \ - M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \ - M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \ - M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \ M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ \ M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \ @@ -634,12 +605,11 @@ class IColumn; M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \ M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ - M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \ + M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ + M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ - M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ - M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \ M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ @@ -656,8 +626,6 @@ class IColumn; M(Bool, cast_keep_nullable, false, "CAST operator keep Nullable for result data type", 0) \ M(Bool, cast_ipv4_ipv6_default_on_conversion_error, false, "CAST operator into IPv4, CAST operator into IPV6 type, toIPv4, toIPv6 functions will return default value instead of throwing exception on conversion error.", 0) \ M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \ - M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \ - M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \ M(Bool, system_events_show_zero_values, false, "When querying system.events or system.metrics tables, include all metrics, even with zero values.", 0) \ M(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, "Defines how MySQL types are converted to corresponding ClickHouse types. A comma separated list in any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. decimal: convert NUMERIC and DECIMAL types to Decimal when precision allows it. datetime64: convert DATETIME and TIMESTAMP types to DateTime64 instead of DateTime when precision is not 0. date2Date32: convert DATE to Date32 instead of Date. Takes precedence over date2String. date2String: convert DATE to String instead of Date. Overridden by datetime64.", 0) \ M(Bool, optimize_trivial_insert_select, false, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \ @@ -719,9 +687,6 @@ class IColumn; M(Bool, force_aggregate_partitions_independently, false, "Force the use of optimization when it is applicable, but heuristics decided not to use it", 0) \ M(UInt64, max_number_of_partitions_for_independent_aggregation, 128, "Maximal number of partitions in table to apply optimization", 0) \ M(Float, min_hit_rate_to_use_consecutive_keys_optimization, 0.5, "Minimal hit rate of a cache which is used for consecutive keys optimization in aggregation to keep it enabled", 0) \ - /** Experimental feature for moving data between shards. */ \ - \ - M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \ \ M(Bool, engine_file_empty_if_not_exists, false, "Allows to select data from a file engine table without file", 0) \ M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \ @@ -755,6 +720,7 @@ class IColumn; M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \ M(Bool, optimize_group_by_constant_keys, true, "Optimize GROUP BY when all keys in block are constant", 0) \ M(Bool, legacy_column_name_of_tuple_literal, false, "List all names of element of large tuple literals in their column names instead of hash. This settings exists only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher.", 0) \ + M(Bool, enable_named_columns_in_function_tuple, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers.", 0) \ \ M(Bool, query_plan_enable_optimizations, true, "Globally enable/disable query optimization at the query plan level", 0) \ M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \ @@ -762,6 +728,7 @@ class IColumn; M(Bool, query_plan_push_down_limit, true, "Allow to move LIMITs down in the query plan", 0) \ M(Bool, query_plan_split_filter, true, "Allow to split filters in the query plan", 0) \ M(Bool, query_plan_merge_expressions, true, "Allow to merge expressions in the query plan", 0) \ + M(Bool, query_plan_merge_filters, false, "Allow to merge filters in the query plan", 0) \ M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \ M(Bool, query_plan_convert_outer_join_to_inner_join, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values", 0) \ M(Bool, query_plan_optimize_prewhere, true, "Allow to push down filter to PREWHERE expression for supported storages", 0) \ @@ -903,34 +870,11 @@ class IColumn; M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \ M(Bool, cast_string_to_dynamic_use_inference, false, "Use types inference during String to Dynamic conversion", 0) \ M(Bool, enable_blob_storage_log, true, "Write information about blob storage operations to system.blob_storage_log table", 0) \ - \ - /** Experimental functions */ \ - M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ - M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ - M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ - M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ - M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ - M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \ - M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \ - M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ - M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \ - M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ - M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \ - M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \ - M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \ - M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \ - M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \ - M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \ - M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \ - M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ - M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ - M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ - M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \ M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \ M(Bool, print_pretty_type_names, true, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \ M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \ - M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0)\ + M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0) \ M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \ M(SQLSecurityType, default_materialized_view_sql_security, SQLSecurityType::DEFINER, "Allows to set a default value for SQL SECURITY option when creating a materialized view.", 0) \ M(String, default_view_definer, "CURRENT_USER", "Allows to set a default value for DEFINER option when creating view.", 0) \ @@ -940,6 +884,81 @@ class IColumn; M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \ M(Bool, allow_deprecated_snowflake_conversion_functions, false, "Enables deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \ + M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ + M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ + M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ + \ + \ + /* ###################################### */ \ + /* ######## EXPERIMENTAL FEATURES ####### */ \ + /* ###################################### */ \ + M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ + M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ + M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ + M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ + M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ + M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \ + M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \ + M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ + M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \ + M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \ + M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ + M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \ + M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \ + M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \ + M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \ + M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \ + M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \ + M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \ + M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ + \ + M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \ + M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \ + \ + /* Parallel replicas */ \ + M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \ + M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \ + M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \ + M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \ + M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \ + M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \ + M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \ + M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \ + M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \ + M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \ + M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ + M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ + M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ + M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ + \ + M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ + M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \ + \ + M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \ + \ + /* Analyzer: It's not experimental anymore (WIP) */ \ + M(Bool, allow_experimental_analyzer, true, "Allow new query analyzer.", IMPORTANT) \ + M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \ + \ + M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \ + M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \ + M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \ + \ + M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \ + M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \ + M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \ + M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \ + \ + M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ + M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \ + \ + M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \ + M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \ + \ + /** Experimental feature for moving data between shards. */ \ + M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \ + + /** End of experimental features */ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. @@ -953,7 +972,6 @@ class IColumn; #define OBSOLETE_SETTINGS(M, ALIAS) \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ - MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 1) \ MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \ MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \ MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \ @@ -1047,6 +1065,7 @@ class IColumn; M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \ M(Bool, input_format_orc_filter_push_down, true, "When reading ORC files, skip whole stripes or row groups based on the WHERE/PREWHERE expressions, min/max statistics or bloom filter in the ORC metadata.", 0) \ + M(Bool, input_format_orc_read_use_writer_time_zone, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT.", 0) \ M(Bool, input_format_parquet_allow_missing_columns, true, "Allow missing columns while reading Parquet input formats", 0) \ M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \ M(Bool, input_format_arrow_allow_missing_columns, true, "Allow missing columns while reading Arrow input formats", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index e7838ef9e9c..c1def91f7b8 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -58,13 +58,17 @@ String ClickHouseVersion::toString() const static std::initializer_list> settings_changes_history_initializer = { {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + {"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"}, {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"}, + {"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."}, {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."}, {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."}, + {"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."}, {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."}, {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"}, {"database_replicated_allow_replicated_engine_arguments", 1, 0, "Don't allow explicit arguments by default"}, {"database_replicated_allow_explicit_uuid", 0, 0, "Added a new setting to disallow explicitly specifying table UUID"}, + {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"}, {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"}, {"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"}, {"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"}, diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp index c920e69c13b..5302cdb18f9 100644 --- a/src/DataTypes/DataTypeDynamic.cpp +++ b/src/DataTypes/DataTypeDynamic.cpp @@ -2,9 +2,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -110,28 +112,58 @@ std::unique_ptr DataTypeDynamic::getDynamicSubcolumnDa } /// Extract nested subcolumn of requested dynamic subcolumn if needed. - if (!subcolumn_nested_name.empty()) + /// If requested subcolumn is null map, it's processed separately as there is no Nullable type yet. + bool is_null_map_subcolumn = subcolumn_nested_name == "null"; + if (is_null_map_subcolumn) + { + res->type = std::make_shared(); + } + else if (!subcolumn_nested_name.empty()) { res = getSubcolumnData(subcolumn_nested_name, *res, throw_if_null); if (!res) return nullptr; } - res->serialization = std::make_shared(res->serialization, subcolumn_type->getName()); - res->type = makeNullableOrLowCardinalityNullableSafe(res->type); + res->serialization = std::make_shared(res->serialization, subcolumn_type->getName(), is_null_map_subcolumn); + /// Make resulting subcolumn Nullable only if type subcolumn can be inside Nullable or can be LowCardinality(Nullable()). + bool make_subcolumn_nullable = subcolumn_type->canBeInsideNullable() || subcolumn_type->lowCardinality(); + if (!is_null_map_subcolumn && make_subcolumn_nullable) + res->type = makeNullableOrLowCardinalityNullableSafe(res->type); + if (data.column) { if (discriminator) { - /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator to + /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to /// create full subcolumn from variant according to discriminators. const auto & variant_column = assert_cast(*data.column).getVariantColumn(); - auto creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), "", *discriminator, variant_column.localDiscriminatorByGlobal(*discriminator)); - res->column = creator.create(res->column); + std::unique_ptr creator; + if (is_null_map_subcolumn) + creator = std::make_unique( + variant_column.getLocalDiscriminatorsPtr(), + "", + *discriminator, + variant_column.localDiscriminatorByGlobal(*discriminator)); + else + creator = std::make_unique( + variant_column.getLocalDiscriminatorsPtr(), + "", + *discriminator, + variant_column.localDiscriminatorByGlobal(*discriminator), + make_subcolumn_nullable); + res->column = creator->create(res->column); + } + /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values. + else if (is_null_map_subcolumn) + { + /// Fill null map with 1 when there is no such Dynamic subcolumn. + auto column = ColumnUInt8::create(); + assert_cast(*column).getData().resize_fill(data.column->size(), 1); + res->column = std::move(column); } else { - /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values. auto column = res->type->createColumn(); column->insertManyDefaults(data.column->size()); res->column = std::move(column); diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 1c9715bbf53..1cb64b65d3a 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -173,7 +173,7 @@ bool IDataType::hasDynamicSubcolumns() const auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()); auto callback = [&](const SubstreamPath &, const String &, const SubstreamData & subcolumn_data) { - has_dynamic_subcolumns |= subcolumn_data.type->hasDynamicSubcolumnsData(); + has_dynamic_subcolumns |= subcolumn_data.type && subcolumn_data.type->hasDynamicSubcolumnsData(); }; forEachSubcolumn(callback, data); return has_dynamic_subcolumns; diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index bbb1d1a6cd1..7642a6619b3 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -64,6 +64,9 @@ String ISerialization::Substream::toString() const if (type == VariantElement) return fmt::format("VariantElement({})", variant_element_name); + if (type == VariantElementNullMap) + return fmt::format("VariantElementNullMap({}.null)", variant_element_name); + return String(magic_enum::enum_name(type)); } @@ -195,6 +198,8 @@ String getNameForSubstreamPath( stream_name += ".variant_offsets"; else if (it->type == Substream::VariantElement) stream_name += "." + it->variant_element_name; + else if (it->type == Substream::VariantElementNullMap) + stream_name += "." + it->variant_element_name + ".null"; else if (it->type == SubstreamType::DynamicStructure) stream_name += ".dynamic_structure"; } @@ -395,7 +400,8 @@ bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t pref return path[last_elem].type == Substream::NullMap || path[last_elem].type == Substream::TupleElement || path[last_elem].type == Substream::ArraySizes - || path[last_elem].type == Substream::VariantElement; + || path[last_elem].type == Substream::VariantElement + || path[last_elem].type == Substream::VariantElementNullMap; } ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 914ff9cf4a2..6007eca94d4 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -184,6 +184,7 @@ public: VariantOffsets, VariantElements, VariantElement, + VariantElementNullMap, DynamicData, DynamicStructure, @@ -256,6 +257,8 @@ public: bool position_independent_encoding = true; + bool use_compact_variant_discriminators_serialization = false; + enum class DynamicStatisticsMode { NONE, /// Don't write statistics. @@ -434,6 +437,9 @@ protected: template State * checkAndGetState(const StatePtr & state) const; + template + static State * checkAndGetState(const StatePtr & state, const ISerialization * serialization); + [[noreturn]] void throwUnexpectedDataAfterParsedValue(IColumn & column, ReadBuffer & istr, const FormatSettings &, const String & type_name) const; }; @@ -444,10 +450,16 @@ using SubstreamType = ISerialization::Substream::Type; template State * ISerialization::checkAndGetState(const StatePtr & state) const +{ + return checkAndGetState(state, this); +} + +template +State * ISerialization::checkAndGetState(const StatePtr & state, const ISerialization * serialization) { if (!state) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Got empty state for {}", demangle(typeid(*this).name())); + "Got empty state for {}", demangle(typeid(*serialization).name())); auto * state_concrete = typeid_cast(state.get()); if (!state_concrete) @@ -455,7 +467,7 @@ State * ISerialization::checkAndGetState(const StatePtr & state) const auto & state_ref = *state; throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid State for {}. Expected: {}, got {}", - demangle(typeid(*this).name()), + demangle(typeid(*serialization).name()), demangle(typeid(State).name()), demangle(typeid(state_ref).name())); } diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp index dafd6d663b0..211f0ac9377 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp +++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -77,7 +78,10 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix( if (auto global_discr = assert_cast(*variant_type).tryGetVariantDiscriminator(dynamic_element_name)) { settings.path.push_back(Substream::DynamicData); - dynamic_element_state->variant_serialization = std::make_shared(nested_serialization, dynamic_element_name, *global_discr); + if (is_null_map_subcolumn) + dynamic_element_state->variant_serialization = std::make_shared(dynamic_element_name, *global_discr); + else + dynamic_element_state->variant_serialization = std::make_shared(nested_serialization, dynamic_element_name, *global_discr); dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache); settings.path.pop_back(); } @@ -98,7 +102,16 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( SubstreamsCache * cache) const { if (!state) + { + if (is_null_map_subcolumn) + { + auto mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + } + return; + } auto * dynamic_element_state = checkAndGetState(state); @@ -108,6 +121,12 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams( dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache); settings.path.pop_back(); } + else if (is_null_map_subcolumn) + { + auto mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + } else { auto mutable_column = result_column->assumeMutable(); diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.h b/src/DataTypes/Serializations/SerializationDynamicElement.h index 2ddc3324139..127d14a55e0 100644 --- a/src/DataTypes/Serializations/SerializationDynamicElement.h +++ b/src/DataTypes/Serializations/SerializationDynamicElement.h @@ -13,11 +13,11 @@ private: /// To be able to deserialize Dynamic element as a subcolumn /// we need its type name and global discriminator. String dynamic_element_name; + bool is_null_map_subcolumn; public: - SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_) - : SerializationWrapper(nested_) - , dynamic_element_name(dynamic_element_name_) + SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, bool is_null_map_subcolumn_ = false) + : SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), is_null_map_subcolumn(is_null_map_subcolumn_) { } diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp index b386fd8ab45..e4d71e84cc7 100644 --- a/src/DataTypes/Serializations/SerializationVariant.cpp +++ b/src/DataTypes/Serializations/SerializationVariant.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -30,12 +31,18 @@ namespace ErrorCodes struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState { - std::vector states; + explicit SerializeBinaryBulkStateVariant(UInt64 mode) : discriminators_mode(mode) + { + } + + SerializationVariant::DiscriminatorsSerializationMode discriminators_mode; + std::vector variant_states; }; struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState { - std::vector states; + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; + std::vector variant_states; }; void SerializationVariant::enumerateStreams( @@ -65,13 +72,19 @@ void SerializationVariant::enumerateStreams( for (size_t i = 0; i < variants.size(); ++i) { - settings.path.back().creator = std::make_shared(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i); + DataTypePtr type = type_variant ? type_variant->getVariant(i) : nullptr; + settings.path.back().creator = std::make_shared( + local_discriminators, + variant_names[i], + i, + column_variant ? column_variant->localDiscriminatorByGlobal(i) : i, + !type || type->canBeInsideNullable() || type->lowCardinality()); auto variant_data = SubstreamData(variants[i]) - .withType(type_variant ? type_variant->getVariant(i) : nullptr) + .withType(type) .withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr) .withSerializationInfo(data.serialization_info) - .withDeserializeState(variant_deserialize_state ? variant_deserialize_state->states[i] : nullptr); + .withDeserializeState(variant_deserialize_state ? variant_deserialize_state->variant_states[i] : nullptr); addVariantElementToPath(settings.path, i); settings.path.back().data = variant_data; @@ -79,6 +92,24 @@ void SerializationVariant::enumerateStreams( settings.path.pop_back(); } + /// Variant subcolumns like variant.Type have type Nullable(Type), so we want to support reading null map subcolumn from it: variant.Type.null. + /// Nullable column is created during deserialization of a variant subcolumn according to the discriminators, so we don't have actual Nullable + /// serialization with null map subcolumn. To be able to read null map subcolumn from the variant subcolumn we use special serialization + /// SerializationVariantElementNullMap. + auto null_map_data = SubstreamData(std::make_shared>()) + .withType(type_variant ? std::make_shared() : nullptr) + .withColumn(column_variant ? ColumnUInt8::create() : nullptr); + + for (size_t i = 0; i < variants.size(); ++i) + { + settings.path.back().creator = std::make_shared(local_discriminators, variant_names[i], i, column_variant ? column_variant->localDiscriminatorByGlobal(i) : i); + settings.path.push_back(Substream::VariantElementNullMap); + settings.path.back().variant_element_name = variant_names[i]; + settings.path.back().data = null_map_data; + callback(settings.path); + settings.path.pop_back(); + } + settings.path.pop_back(); } @@ -87,17 +118,26 @@ void SerializationVariant::serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { - const ColumnVariant & col = assert_cast(column); + settings.path.push_back(Substream::VariantDiscriminators); + auto * discriminators_stream = settings.getter(settings.path); + settings.path.pop_back(); - auto variant_state = std::make_shared(); - variant_state->states.resize(variants.size()); + if (!discriminators_stream) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for VariantDiscriminators in SerializationVariant::serializeBinaryBulkStatePrefix"); + + UInt64 mode = settings.use_compact_variant_discriminators_serialization ? DiscriminatorsSerializationMode::COMPACT : DiscriminatorsSerializationMode::BASIC; + writeBinaryLittleEndian(mode, *discriminators_stream); + + const ColumnVariant & col = assert_cast(column); + auto variant_state = std::make_shared(mode); + variant_state->variant_states.resize(variants.size()); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkStatePrefix(col.getVariantByGlobalDiscriminator(i), settings, variant_state->variant_states[i]); settings.path.pop_back(); } @@ -116,7 +156,7 @@ void SerializationVariant::serializeBinaryBulkStateSuffix( for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkStateSuffix(settings, variant_state->variant_states[i]); settings.path.pop_back(); } settings.path.pop_back(); @@ -128,14 +168,19 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { + DeserializeBinaryBulkStatePtr discriminators_state = deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) + return; + auto variant_state = std::make_shared(); - variant_state->states.resize(variants.size()); + variant_state->discriminators_state = discriminators_state; + variant_state->variant_states.resize(variants.size()); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i < variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i], cache); + variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->variant_states[i], cache); settings.path.pop_back(); } @@ -143,6 +188,29 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix( state = std::move(variant_state); } +ISerialization::DeserializeBinaryBulkStatePtr SerializationVariant::deserializeDiscriminatorsStatePrefix( + DeserializeBinaryBulkSettings & settings, + SubstreamsDeserializeStatesCache * cache) +{ + settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStatePtr discriminators_state = nullptr; + if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path)) + { + discriminators_state = cached_state; + } + else if (auto * discriminators_stream = settings.getter(settings.path)) + { + UInt64 mode; + readBinaryLittleEndian(mode, *discriminators_stream); + discriminators_state = std::make_shared(mode); + addToSubstreamsDeserializeStatesCache(cache, settings.path, discriminators_state); + } + + settings.path.pop_back(); + return discriminators_state; +} + void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics( const IColumn & column, @@ -165,13 +233,71 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian auto * variant_state = checkAndGetState(state); - /// If offset = 0 and limit == col.size() or we have only NULLs, we don't need to calculate + /// Don't write anything if column is empty. + if (limit == 0) + return; + + /// Write number of rows in this granule in compact mode. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + writeVarUInt(UInt64(limit), *discriminators_stream); + + /// If column has only one none empty discriminators and no NULLs we don't need to + /// calculate limits for variants and use provided offset/limit. + if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) + { + auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr); + + /// In compact mode write the format of the granule and single non-empty discriminator. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); + } + /// For basic mode just serialize this discriminator limit times. + else + { + for (size_t i = 0; i < limit; ++i) + writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); + } + + settings.path.push_back(Substream::VariantElements); + addVariantElementToPath(settings.path, non_empty_global_discr); + /// We can use the same offset/limit as for whole Variant column + variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->variant_states[non_empty_global_discr]); + variants_statistics[variant_names[non_empty_global_discr]] += limit; + settings.path.pop_back(); + settings.path.pop_back(); + return; + } + /// If column has only NULLs, just serialize NULL discriminators. + else if (col.hasOnlyNulls()) + { + /// In compact mode write single NULL_DISCRIMINATOR. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + /// In basic mode write NULL_DISCRIMINATOR limit times. + else + { + for (size_t i = 0; i < limit; ++i) + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + return; + } + + /// If offset = 0 and limit == col.size() we don't need to calculate /// offsets and limits for variants and need to just serialize whole columns. - if ((offset == 0 && limit == col.size()) || col.hasOnlyNulls()) + if ((offset == 0 && limit == col.size())) { /// First, serialize discriminators. - /// If we have only NULLs or local and global discriminators are the same, just serialize the column as is. - if (col.hasOnlyNulls() || col.hasGlobalVariantsOrder()) + /// Here we are sure that column contains different discriminators, use plain granule format in compact mode. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::COMPACT) + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream); + + /// If local and global discriminators are the same, just serialize the column as is. + if (col.hasGlobalVariantsOrder()) { SerializationNumber().serializeBinaryBulk(col.getLocalDiscriminatorsColumn(), *discriminators_stream, offset, limit); } @@ -188,7 +314,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian for (size_t i = 0; i != variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]); + variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->variant_states[i]); variants_statistics[variant_names[i]] += col.getVariantByGlobalDiscriminator(i).size(); settings.path.pop_back(); } @@ -196,36 +322,16 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian return; } - /// If we have only one non empty variant and no NULLs, we can use the same limit offset for this variant. - if (auto non_empty_local_discr = col.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls()) - { - /// First, serialize discriminators. - /// We know that all discriminators are the same, so we just need to serialize this discriminator limit times. - auto non_empty_global_discr = col.globalDiscriminatorByLocal(*non_empty_local_discr); - for (size_t i = 0; i != limit; ++i) - writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream); - - /// Second, serialize non-empty variant (other variants are empty and we can skip their serialization). - settings.path.push_back(Substream::VariantElements); - addVariantElementToPath(settings.path, non_empty_global_discr); - /// We can use the same offset/limit as for whole Variant column - variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]); - variants_statistics[variant_names[non_empty_global_discr]] += limit; - settings.path.pop_back(); - settings.path.pop_back(); - return; - } - /// In general case we should iterate through local discriminators in range [offset, offset + limit] to serialize global discriminators and calculate offset/limit pair for each variant. const auto & local_discriminators = col.getLocalDiscriminators(); const auto & offsets = col.getOffsets(); std::vector> variant_offsets_and_limits(variants.size(), {0, 0}); size_t end = offset + limit; + size_t num_non_empty_variants_in_range = 0; + ColumnVariant::Discriminator last_non_empty_variant_discr = 0; for (size_t i = offset; i < end; ++i) { auto global_discr = col.globalDiscriminatorByLocal(local_discriminators[i]); - writeBinaryLittleEndian(global_discr, *discriminators_stream); - if (global_discr != ColumnVariant::NULL_DISCRIMINATOR) { /// If we see this discriminator for the first time, update offset @@ -233,9 +339,38 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian variant_offsets_and_limits[global_discr].first = offsets[i]; /// Update limit for this discriminator. ++variant_offsets_and_limits[global_discr].second; + ++num_non_empty_variants_in_range; + last_non_empty_variant_discr = global_discr; } } + /// In basic mode just serialize discriminators as is row by row. + if (variant_state->discriminators_mode.value == DiscriminatorsSerializationMode::BASIC) + { + for (size_t i = offset; i < end; ++i) + writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream); + } + /// In compact mode check if we have the same discriminator for all rows in this granule. + /// First, check if all values in granule are NULLs. + else if (num_non_empty_variants_in_range == 0) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(ColumnVariant::NULL_DISCRIMINATOR, *discriminators_stream); + } + /// Then, check if there is only 1 variant and no NULLs in this granule. + else if (num_non_empty_variants_in_range == 1 && variant_offsets_and_limits[last_non_empty_variant_discr].second == limit) + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::COMPACT), *discriminators_stream); + writeBinaryLittleEndian(last_non_empty_variant_discr, *discriminators_stream); + } + /// Otherwise there are different discriminators in this granule. + else + { + writeBinaryLittleEndian(UInt8(CompactDiscriminatorsGranuleFormat::PLAIN), *discriminators_stream); + for (size_t i = offset; i < end; ++i) + writeBinaryLittleEndian(col.globalDiscriminatorByLocal(local_discriminators[i]), *discriminators_stream); + } + /// Serialize variants in global order. settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i != variants.size(); ++i) @@ -249,7 +384,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVarian variant_offsets_and_limits[i].first, variant_offsets_and_limits[i].second, settings, - variant_state->states[i]); + variant_state->variant_states[i]); variants_statistics[variant_names[i]] += variant_offsets_and_limits[i].second; settings.path.pop_back(); } @@ -284,39 +419,68 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( /// First, deserialize discriminators. settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariant * variant_state = nullptr; + std::vector variant_limits; if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) { + variant_state = checkAndGetState(state); col.getLocalDiscriminatorsPtr() = cached_discriminators; } - else + else if (auto * discriminators_stream = settings.getter(settings.path)) { - auto * discriminators_stream = settings.getter(settings.path); - if (!discriminators_stream) - return; + variant_state = checkAndGetState(state); + auto * discriminators_state = checkAndGetState(variant_state->discriminators_state); + + /// Deserialize discriminators according to serialization mode. + if (discriminators_state->mode.value == DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limits = deserializeCompactDiscriminators(col.getLocalDiscriminatorsPtr(), limit, discriminators_stream, settings.continuous_reading, *discriminators_state); - SerializationNumber().deserializeBinaryBulk(*col.getLocalDiscriminatorsPtr()->assumeMutable(), *discriminators_stream, limit, 0); addToSubstreamsCache(cache, settings.path, col.getLocalDiscriminatorsPtr()); } + /// It may happen that there is no such stream, in this case just do nothing. + else + { + settings.path.pop_back(); + return; + } + settings.path.pop_back(); - /// Second, calculate limits for each variant by iterating through new discriminators. - std::vector variant_limits(variants.size(), 0); - auto & discriminators_data = col.getLocalDiscriminators(); - size_t discriminators_offset = discriminators_data.size() - limit; - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + /// Second, calculate limits for each variant by iterating through new discriminators + /// if we didn't do it during discriminators deserialization. + if (variant_limits.empty()) { - ColumnVariant::Discriminator discr = discriminators_data[i]; - if (discr != ColumnVariant::NULL_DISCRIMINATOR) - ++variant_limits[discr]; + variant_limits.resize(variants.size(), 0); + auto & discriminators_data = col.getLocalDiscriminators(); + + /// We can actually read less than limit discriminators and we cannot determine the actual number of read rows + /// by discriminators column as it could be taken from the substreams cache. And we need actual number of read + /// rows to fill offsets correctly later if they are not in the cache. We can determine if offsets column is in cache + /// or not by comparing it with discriminators column size (they should be the same when offsets are in cache). + /// If offsets are not in the cache, we can use it's size to determine the actual number of read rows. + size_t num_new_discriminators = limit; + size_t offsets_size = col.getOffsetsPtr()->size(); + if (discriminators_data.size() > offsets_size) + num_new_discriminators = discriminators_data.size() - offsets_size; + size_t discriminators_offset = discriminators_data.size() - num_new_discriminators; + + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr != ColumnVariant::NULL_DISCRIMINATOR) + ++variant_limits[discr]; + } } /// Now we can deserialize variants according to their limits. - auto * variant_state = checkAndGetState(state); settings.path.push_back(Substream::VariantElements); for (size_t i = 0; i != variants.size(); ++i) { addVariantElementToPath(settings.path, i); - variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->states[i], cache); + variants[i]->deserializeBinaryBulkWithMultipleStreams(col.getVariantPtrByLocalDiscriminator(i), variant_limits[i], settings, variant_state->variant_states[i], cache); settings.path.pop_back(); } settings.path.pop_back(); @@ -336,20 +500,49 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( } else { - auto & offsets = col.getOffsets(); - offsets.reserve(offsets.size() + limit); std::vector variant_offsets; variant_offsets.reserve(variants.size()); + size_t num_non_empty_variants = 0; + ColumnVariant::Discriminator last_non_empty_discr = 0; for (size_t i = 0; i != variants.size(); ++i) - variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]); - - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) { - ColumnVariant::Discriminator discr = discriminators_data[i]; - if (discr == ColumnVariant::NULL_DISCRIMINATOR) - offsets.emplace_back(); - else - offsets.push_back(variant_offsets[discr]++); + if (variant_limits[i]) + { + ++num_non_empty_variants; + last_non_empty_discr = i; + } + + variant_offsets.push_back(col.getVariantByLocalDiscriminator(i).size() - variant_limits[i]); + } + + auto & discriminators_data = col.getLocalDiscriminators(); + auto & offsets = col.getOffsets(); + size_t num_new_offsets = discriminators_data.size() - offsets.size(); + offsets.reserve(offsets.size() + num_new_offsets); + /// If there are only NULLs were read, fill offsets with 0. + if (num_non_empty_variants == 0) + { + offsets.resize_fill(discriminators_data.size(), 0); + } + /// If there is only 1 variant and no NULLs was read, fill offsets with sequential offsets of this variant. + else if (num_non_empty_variants == 1 && variant_limits[last_non_empty_discr] == num_new_offsets) + { + size_t first_offset = col.getVariantByLocalDiscriminator(last_non_empty_discr).size() - num_new_offsets; + for (size_t i = 0; i != num_new_offsets; ++i) + offsets.push_back(first_offset + i); + } + /// Otherwise iterate through discriminators and fill offsets accordingly. + else + { + size_t start = offsets.size(); + for (size_t i = start; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr == ColumnVariant::NULL_DISCRIMINATOR) + offsets.emplace_back(); + else + offsets.push_back(variant_offsets[discr]++); + } } addToSubstreamsCache(cache, settings.path, col.getOffsetsPtr()); @@ -357,6 +550,72 @@ void SerializationVariant::deserializeBinaryBulkWithMultipleStreams( settings.path.pop_back(); } +std::vector SerializationVariant::deserializeCompactDiscriminators( + DB::ColumnPtr & discriminators_column, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStateVariantDiscriminators & state) const +{ + auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); + auto & discriminators_data = discriminators.getData(); + + /// Reset state if we are reading from the start of the granule and not from the previous position in the file. + if (!continuous_reading) + state.remaining_rows_in_granule = 0; + + /// Calculate limits for variants during discriminators deserialization. + std::vector variant_limits(variants.size(), 0); + while (limit) + { + /// If we read all rows from current granule, start reading the next one. + if (state.remaining_rows_in_granule == 0) + { + if (stream->eof()) + return variant_limits; + + readDiscriminatorsGranuleStart(state, stream); + } + + size_t limit_in_granule = std::min(limit, state.remaining_rows_in_granule); + if (state.granule_format == CompactDiscriminatorsGranuleFormat::COMPACT) + { + auto & data = discriminators.getData(); + data.resize_fill(data.size() + limit_in_granule, state.compact_discr); + if (state.compact_discr != ColumnVariant::NULL_DISCRIMINATOR) + variant_limits[state.compact_discr] += limit_in_granule; + } + else + { + SerializationNumber().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0); + size_t start = discriminators_data.size() - limit_in_granule; + for (size_t i = start; i != discriminators_data.size(); ++i) + { + ColumnVariant::Discriminator discr = discriminators_data[i]; + if (discr != ColumnVariant::NULL_DISCRIMINATOR) + ++variant_limits[discr]; + } + } + + state.remaining_rows_in_granule -= limit_in_granule; + limit -= limit_in_granule; + } + + return variant_limits; +} + +void SerializationVariant::readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, DB::ReadBuffer * stream) +{ + UInt64 granule_size; + readVarUInt(granule_size, *stream); + state.remaining_rows_in_granule = granule_size; + UInt8 granule_format; + readBinaryLittleEndian(granule_format, *stream); + state.granule_format = static_cast(granule_format); + if (granule_format == CompactDiscriminatorsGranuleFormat::COMPACT) + readBinaryLittleEndian(state.compact_discr, *stream); +} + void SerializationVariant::addVariantElementToPath(DB::ISerialization::SubstreamPath & path, size_t i) const { path.push_back(Substream::VariantElement); diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h index b6aa1534538..af89632cf81 100644 --- a/src/DataTypes/Serializations/SerializationVariant.h +++ b/src/DataTypes/Serializations/SerializationVariant.h @@ -2,10 +2,18 @@ #include #include +#include namespace DB { + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + + /// Class for serializing/deserializing column with Variant type. /// It supports both text and binary bulk serializations/deserializations. /// @@ -18,6 +26,17 @@ namespace DB /// /// During binary bulk serialization it transforms local discriminators /// to global and serializes them into a separate stream VariantDiscriminators. +/// There are 2 modes of serialising discriminators: +/// Basic mode, when all discriminators are serialized as is row by row. +/// Compact mode, when we avoid writing the same discriminators in granules when there is +/// only one variant (or only NULLs) in the granule. +/// In compact mode we serialize granules in the following format: +/// +/// There are 2 different formats of granule - plain and compact. +/// Plain format is used when there are different discriminators in this granule, +/// in this format all discriminators are serialized as is row by row. +/// Compact format is used when all discriminators are the same in this granule, +/// in this case only this single discriminator is serialized. /// Each variant is serialized into a separate stream with path VariantElements/VariantElement /// (VariantElements stream is needed for correct sub-columns creation). We store and serialize /// variants in a sparse form (the size of a variant column equals to the number of its discriminator @@ -32,6 +51,25 @@ namespace DB class SerializationVariant : public ISerialization { public: + struct DiscriminatorsSerializationMode + { + enum Value + { + BASIC = 0, /// Store the whole discriminators column. + COMPACT = 1, /// Don't write discriminators in granule if all of them are the same. + }; + + static void checkMode(UInt64 mode) + { + if (mode > Value::COMPACT) + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for SerializationVariant discriminators column."); + } + + explicit DiscriminatorsSerializationMode(UInt64 mode) : value(static_cast(mode)) { checkMode(mode); } + + Value value; + }; + using VariantSerializations = std::vector; explicit SerializationVariant( @@ -123,8 +161,44 @@ public: static std::vector getVariantsDeserializeTextOrder(const DataTypes & variant_types); private: + friend SerializationVariantElement; + friend SerializationVariantElementNullMap; + void addVariantElementToPath(SubstreamPath & path, size_t i) const; + enum CompactDiscriminatorsGranuleFormat + { + PLAIN = 0, /// Granule has different discriminators and they are serialized as is row by row. + COMPACT = 1, /// Granule has single discriminator for all rows and it is serialized as single value. + }; + + struct DeserializeBinaryBulkStateVariantDiscriminators : public ISerialization::DeserializeBinaryBulkState + { + explicit DeserializeBinaryBulkStateVariantDiscriminators(UInt64 mode_) : mode(mode_) + { + } + + DiscriminatorsSerializationMode mode; + + /// Deserialize state of currently read granule in compact mode. + CompactDiscriminatorsGranuleFormat granule_format = CompactDiscriminatorsGranuleFormat::PLAIN; + size_t remaining_rows_in_granule = 0; + ColumnVariant::Discriminator compact_discr = 0; + }; + + static DeserializeBinaryBulkStatePtr deserializeDiscriminatorsStatePrefix( + DeserializeBinaryBulkSettings & settings, + SubstreamsDeserializeStatesCache * cache); + + std::vector deserializeCompactDiscriminators( + ColumnPtr & discriminators_column, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStateVariantDiscriminators & state) const; + + static void readDiscriminatorsGranuleStart(DeserializeBinaryBulkStateVariantDiscriminators & state, ReadBuffer * stream); + bool tryDeserializeTextEscapedImpl(IColumn & column, const String & field, const FormatSettings & settings) const; bool tryDeserializeTextQuotedImpl(IColumn & column, const String & field, const FormatSettings & settings) const; bool tryDeserializeWholeTextImpl(IColumn & column, const String & field, const FormatSettings & settings) const; diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp index ec0b4019c2f..8ceab17cba4 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.cpp +++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -12,7 +13,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState +struct SerializationVariantElement::DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState { /// During deserialization discriminators and variant streams can be shared. /// For example we can read several variant elements together: "select v.UInt32, v.String from table", @@ -24,7 +25,7 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria /// substream cache correctly. ColumnPtr discriminators; ColumnPtr variant; - + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; ISerialization::DeserializeBinaryBulkStatePtr variant_element_state; }; @@ -65,7 +66,12 @@ void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinary void SerializationVariantElement::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const { + DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) + return; + auto variant_element_state = std::make_shared(); + variant_element_state->discriminators_state = discriminators_state; addVariantToPath(settings.path); nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state, cache); @@ -86,35 +92,61 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const { - auto * variant_element_state = checkAndGetState(state); - /// First, deserialize discriminators from Variant column. settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariantElement * variant_element_state = nullptr; + std::optional variant_limit; if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) { + variant_element_state = checkAndGetState(state); variant_element_state->discriminators = cached_discriminators; } - else + else if (auto * discriminators_stream = settings.getter(settings.path)) { - auto * discriminators_stream = settings.getter(settings.path); - if (!discriminators_stream) - return; + variant_element_state = checkAndGetState(state); + auto * discriminators_state = checkAndGetState(variant_element_state->discriminators_state); /// If we started to read a new column, reinitialize discriminators column in deserialization state. if (!variant_element_state->discriminators || result_column->empty()) variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create(); - SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + /// Deserialize discriminators according to serialization mode. + if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limit = deserializeCompactDiscriminators( + variant_element_state->discriminators, + variant_discriminator, + limit, + discriminators_stream, + settings.continuous_reading, + variant_element_state->discriminators_state, + this); + addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators); } + else + { + settings.path.pop_back(); + return; + } + settings.path.pop_back(); - /// Iterate through new discriminators to calculate the limit for our variant. + /// We could read less than limit discriminators, but we will need actual number of read rows later. + size_t num_new_discriminators = variant_element_state->discriminators->size() - result_column->size(); + + /// Iterate through new discriminators to calculate the limit for our variant + /// if we didn't do it during discriminators deserialization. const auto & discriminators_data = assert_cast(*variant_element_state->discriminators).getData(); - size_t discriminators_offset = variant_element_state->discriminators->size() - limit; - size_t variant_limit = 0; - for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) - variant_limit += (discriminators_data[i] == variant_discriminator); + size_t discriminators_offset = variant_element_state->discriminators->size() - num_new_discriminators; + if (!variant_limit) + { + variant_limit = 0; + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + *variant_limit += (discriminators_data[i] == variant_discriminator); + } /// Now we know the limit for our variant and can deserialize it. @@ -125,19 +157,19 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( auto & nullable_column = assert_cast(*mutable_column); NullMap & null_map = nullable_column.getNullMapData(); /// If we have only our discriminator in range, fill null map with 0. - if (variant_limit == limit) + if (variant_limit == num_new_discriminators) { - null_map.resize_fill(null_map.size() + limit, 0); + null_map.resize_fill(null_map.size() + num_new_discriminators, 0); } /// If no our discriminator in current range, fill null map with 1. else if (variant_limit == 0) { - null_map.resize_fill(null_map.size() + limit, 1); + null_map.resize_fill(null_map.size() + num_new_discriminators, 1); } /// Otherwise we should iterate through discriminators to fill null map. else { - null_map.reserve(null_map.size() + limit); + null_map.reserve(null_map.size() + num_new_discriminators); for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) null_map.push_back(discriminators_data[i] != variant_discriminator); } @@ -159,12 +191,12 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( /// If nothing to deserialize, just insert defaults. if (variant_limit == 0) { - mutable_column->insertManyDefaults(limit); + mutable_column->insertManyDefaults(num_new_discriminators); return; } addVariantToPath(settings.path); - nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, variant_limit, settings, variant_element_state->variant_element_state, cache); + nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, *variant_limit, settings, variant_element_state->variant_element_state, cache); removeVariantFromPath(settings.path); /// If nothing was deserialized when variant_limit > 0 @@ -173,16 +205,16 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( /// In this case we should just insert default values. if (variant_element_state->variant->empty()) { - mutable_column->insertManyDefaults(limit); + mutable_column->insertManyDefaults(num_new_discriminators); return; } - size_t variant_offset = variant_element_state->variant->size() - variant_limit; + size_t variant_offset = variant_element_state->variant->size() - *variant_limit; /// If we have only our discriminator in range, insert the whole range to result column. - if (variant_limit == limit) + if (variant_limit == num_new_discriminators) { - mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, variant_limit); + mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, *variant_limit); } /// Otherwise iterate through discriminators and insert value from variant or default value depending on the discriminator. else @@ -197,6 +229,59 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams( } } +size_t SerializationVariantElement::deserializeCompactDiscriminators( + DB::ColumnPtr & discriminators_column, + ColumnVariant::Discriminator variant_discriminator, + size_t limit, + DB::ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStatePtr & discriminators_state_, + const ISerialization * serialization) +{ + auto * discriminators_state = checkAndGetState(discriminators_state_, serialization); + auto & discriminators = assert_cast(*discriminators_column->assumeMutable()); + auto & discriminators_data = discriminators.getData(); + + /// Reset state if we are reading from the start of the granule and not from the previous position in the file. + if (!continuous_reading) + discriminators_state->remaining_rows_in_granule = 0; + + /// Calculate our variant limit during discriminators deserialization. + size_t variant_limit = 0; + while (limit) + { + /// If we read all rows from current granule, start reading the next one. + if (discriminators_state->remaining_rows_in_granule == 0) + { + if (stream->eof()) + return variant_limit; + + SerializationVariant::readDiscriminatorsGranuleStart(*discriminators_state, stream); + } + + size_t limit_in_granule = std::min(limit, discriminators_state->remaining_rows_in_granule); + if (discriminators_state->granule_format == SerializationVariant::CompactDiscriminatorsGranuleFormat::COMPACT) + { + auto & data = discriminators.getData(); + data.resize_fill(data.size() + limit_in_granule, discriminators_state->compact_discr); + if (discriminators_state->compact_discr == variant_discriminator) + variant_limit += limit_in_granule; + } + else + { + SerializationNumber().deserializeBinaryBulk(discriminators, *stream, limit_in_granule, 0); + size_t start = discriminators_data.size() - limit_in_granule; + for (size_t i = start; i != discriminators_data.size(); ++i) + variant_limit += (discriminators_data[i] == variant_discriminator); + } + + discriminators_state->remaining_rows_in_granule -= limit_in_granule; + limit -= limit_in_granule; + } + + return variant_limit; +} + void SerializationVariantElement::addVariantToPath(DB::ISerialization::SubstreamPath & path) const { path.push_back(Substream::VariantElements); @@ -214,17 +299,19 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator( const ColumnPtr & local_discriminators_, const String & variant_element_name_, ColumnVariant::Discriminator global_variant_discriminator_, - ColumnVariant::Discriminator local_variant_discriminator_) + ColumnVariant::Discriminator local_variant_discriminator_, + bool make_nullable_) : local_discriminators(local_discriminators_) , variant_element_name(variant_element_name_) , global_variant_discriminator(global_variant_discriminator_) , local_variant_discriminator(local_variant_discriminator_) + , make_nullable(make_nullable_) { } DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const { - return makeNullableOrLowCardinalityNullableSafe(prev); + return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev; } SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const @@ -237,12 +324,12 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: /// Case when original Variant column contained only one non-empty variant and no NULLs. /// In this case just use this variant. if (prev->size() == local_discriminators->size()) - return makeNullableOrLowCardinalityNullableSafe(prev); + return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev; /// If this variant is empty, fill result column with default values. if (prev->empty()) { - auto res = makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty(); + auto res = make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev)->cloneEmpty() : prev->cloneEmpty(); res->insertManyDefaults(local_discriminators->size()); return res; } @@ -257,16 +344,16 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB: /// Now we can create new column from null-map and variant column using IColumn::expand. auto res_column = IColumn::mutate(prev); - /// Special case for LowCardinality. We want the result to be LowCardinality(Nullable), + /// Special case for LowCardinality when we want the result to be LowCardinality(Nullable), /// but we don't have a good way to apply null-mask for LowCardinality(), so, we first /// convert our column to LowCardinality(Nullable()) and then use expand which will /// fill rows with 0 in mask with default value (that is NULL). - if (prev->lowCardinality()) + if (make_nullable && prev->lowCardinality()) res_column = assert_cast(*res_column).cloneNullable(); res_column->expand(null_map, /*inverted = */ true); - if (res_column->canBeInsideNullable()) + if (make_nullable && prev->canBeInsideNullable()) { auto null_map_col = ColumnUInt8::create(); null_map_col->getData() = std::move(null_map); diff --git a/src/DataTypes/Serializations/SerializationVariantElement.h b/src/DataTypes/Serializations/SerializationVariantElement.h index 0ce0a72e250..69101aea0f5 100644 --- a/src/DataTypes/Serializations/SerializationVariantElement.h +++ b/src/DataTypes/Serializations/SerializationVariantElement.h @@ -9,6 +9,7 @@ namespace DB { class SerializationVariant; +class SerializationVariantElementNullMap; /// Serialization for Variant element when we read it as a subcolumn. class SerializationVariantElement final : public SerializationWrapper @@ -66,12 +67,14 @@ public: const String variant_element_name; const ColumnVariant::Discriminator global_variant_discriminator; const ColumnVariant::Discriminator local_variant_discriminator; + bool make_nullable; VariantSubcolumnCreator( const ColumnPtr & local_discriminators_, const String & variant_element_name_, ColumnVariant::Discriminator global_variant_discriminator_, - ColumnVariant::Discriminator local_variant_discriminator_); + ColumnVariant::Discriminator local_variant_discriminator_, + bool make_nullable_); DataTypePtr create(const DataTypePtr & prev) const override; ColumnPtr create(const ColumnPtr & prev) const override; @@ -79,6 +82,18 @@ public: }; private: friend SerializationVariant; + friend SerializationVariantElementNullMap; + + struct DeserializeBinaryBulkStateVariantElement; + + static size_t deserializeCompactDiscriminators( + ColumnPtr & discriminators_column, + ColumnVariant::Discriminator variant_discriminator, + size_t limit, + ReadBuffer * stream, + bool continuous_reading, + DeserializeBinaryBulkStatePtr & discriminators_state_, + const ISerialization * serialization); void addVariantToPath(SubstreamPath & path) const; void removeVariantFromPath(SubstreamPath & path) const; diff --git a/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp new file mode 100644 index 00000000000..f30da4fecf9 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationVariantElementNullMap.cpp @@ -0,0 +1,190 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +struct DeserializeBinaryBulkStateVariantElementNullMap : public ISerialization::DeserializeBinaryBulkState +{ + /// During deserialization discriminators streams can be shared. + /// For example we can read several variant elements together: "select v.UInt32, v.String.null from table", + /// or we can read the whole variant and some of variant elements or their subcolumns: "select v, v.UInt32.null from table". + /// To read the same column from the same stream more than once we use substream cache, + /// but this cache stores the whole column, not only the current range. + /// During deserialization of variant elements or their subcolumns discriminators column is not stored + /// in the result column, so we need to store them inside deserialization state, so we can use + /// substream cache correctly. + ColumnPtr discriminators; + ISerialization::DeserializeBinaryBulkStatePtr discriminators_state; +}; + +void SerializationVariantElementNullMap::enumerateStreams( + DB::ISerialization::EnumerateStreamsSettings & settings, + const DB::ISerialization::StreamCallback & callback, + const DB::ISerialization::SubstreamData &) const +{ + /// We will need stream for discriminators during deserialization. + settings.path.push_back(Substream::VariantDiscriminators); + callback(settings.path); + settings.path.pop_back(); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkStatePrefix( + const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const +{ + DeserializeBinaryBulkStatePtr discriminators_state = SerializationVariant::deserializeDiscriminatorsStatePrefix(settings, cache); + if (!discriminators_state) + return; + + auto variant_element_null_map_state = std::make_shared(); + variant_element_null_map_state->discriminators_state = std::move(discriminators_state); + state = std::move(variant_element_null_map_state); +} + +void SerializationVariantElementNullMap::serializeBinaryBulkWithMultipleStreams( + const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const +{ + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationVariantElementNullMap"); +} + +void SerializationVariantElementNullMap::deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & result_column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + /// Deserialize discriminators from Variant column. + settings.path.push_back(Substream::VariantDiscriminators); + + DeserializeBinaryBulkStateVariantElementNullMap * variant_element_null_map_state = nullptr; + std::optional variant_limit; + if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path)) + { + variant_element_null_map_state = checkAndGetState(state); + variant_element_null_map_state->discriminators = cached_discriminators; + } + else if (auto * discriminators_stream = settings.getter(settings.path)) + { + variant_element_null_map_state = checkAndGetState(state); + auto * discriminators_state = checkAndGetState( + variant_element_null_map_state->discriminators_state); + + /// If we started to read a new column, reinitialize discriminators column in deserialization state. + if (!variant_element_null_map_state->discriminators || result_column->empty()) + variant_element_null_map_state->discriminators = ColumnVariant::ColumnDiscriminators::create(); + + /// Deserialize discriminators according to serialization mode. + if (discriminators_state->mode.value == SerializationVariant::DiscriminatorsSerializationMode::BASIC) + SerializationNumber().deserializeBinaryBulk( + *variant_element_null_map_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0); + else + variant_limit = SerializationVariantElement::deserializeCompactDiscriminators( + variant_element_null_map_state->discriminators, + variant_discriminator, + limit, + discriminators_stream, + settings.continuous_reading, + variant_element_null_map_state->discriminators_state, + this); + + addToSubstreamsCache(cache, settings.path, variant_element_null_map_state->discriminators); + } + else + { + /// There is no such stream or cached data, it means that there is no Variant column in this part (it could happen after alter table add column). + /// In such cases columns are filled with default values, but for null-map column default value should be 1, not 0. Fill column with 1 here instead. + MutableColumnPtr mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + data.resize_fill(data.size() + limit, 1); + settings.path.pop_back(); + return; + } + settings.path.pop_back(); + + MutableColumnPtr mutable_column = result_column->assumeMutable(); + auto & data = assert_cast(*mutable_column).getData(); + /// Check if there are no such variant in read range. + if (variant_limit && *variant_limit == 0) + { + data.resize_fill(data.size() + limit, 1); + } + /// Check if there is only our variant in read range. + else if (variant_limit && *variant_limit == limit) + { + data.resize_fill(data.size() + limit, 0); + } + /// Iterate through new discriminators to calculate the null map of our variant. + else + { + const auto & discriminators_data + = assert_cast(*variant_element_null_map_state->discriminators).getData(); + size_t discriminators_offset = variant_element_null_map_state->discriminators->size() - limit; + for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i) + data.push_back(discriminators_data[i] != variant_discriminator); + } +} + +SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::VariantNullMapSubcolumnCreator( + const ColumnPtr & local_discriminators_, + const String & variant_element_name_, + ColumnVariant::Discriminator global_variant_discriminator_, + ColumnVariant::Discriminator local_variant_discriminator_) + : local_discriminators(local_discriminators_) + , variant_element_name(variant_element_name_) + , global_variant_discriminator(global_variant_discriminator_) + , local_variant_discriminator(local_variant_discriminator_) +{ +} + +DataTypePtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::DataTypePtr &) const +{ + return std::make_shared(); +} + +SerializationPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::SerializationPtr &) const +{ + return std::make_shared(variant_element_name, global_variant_discriminator); +} + +ColumnPtr SerializationVariantElementNullMap::VariantNullMapSubcolumnCreator::create(const DB::ColumnPtr &) const +{ + /// Iterate through discriminators and create null-map for our variant. + auto null_map_col = ColumnUInt8::create(); + auto & null_map_data = null_map_col->getData(); + null_map_data.reserve(local_discriminators->size()); + const auto & local_discriminators_data = assert_cast(*local_discriminators).getData(); + for (auto local_discr : local_discriminators_data) + null_map_data.push_back(local_discr != local_variant_discriminator); + + return null_map_col; +} + + +} diff --git a/src/DataTypes/Serializations/SerializationVariantElementNullMap.h b/src/DataTypes/Serializations/SerializationVariantElementNullMap.h new file mode 100644 index 00000000000..cd81b445189 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationVariantElementNullMap.h @@ -0,0 +1,107 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +class SerializationVariant; +class SerializationVariantElement; + +/// Serialization for Variant element null map when we read it as a subcolumn. +/// For example, variant.UInt64.null. +/// It requires separate serialization because there is no actual Nullable column +/// and we should construct null map from variant discriminators. +/// The implementation of deserializeBinaryBulk* methods is similar to SerializationVariantElement, +/// but differs in that there is no need to read the actual data of the variant, only discriminators. +class SerializationVariantElementNullMap final : public SimpleTextSerialization +{ +public: + SerializationVariantElementNullMap(const String & variant_element_name_, ColumnVariant::Discriminator variant_discriminator_) + : variant_element_name(variant_element_name_), variant_discriminator(variant_discriminator_) + { + } + + void enumerateStreams( + EnumerateStreamsSettings & settings, + const StreamCallback & callback, + const SubstreamData & data) const override; + + void serializeBinaryBulkStatePrefix( + const IColumn & column, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsDeserializeStatesCache * cache) const override; + + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; + + void serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); } + bool tryDeserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); } + + struct VariantNullMapSubcolumnCreator : public ISubcolumnCreator + { + const ColumnPtr local_discriminators; + const String variant_element_name; + const ColumnVariant::Discriminator global_variant_discriminator; + const ColumnVariant::Discriminator local_variant_discriminator; + + VariantNullMapSubcolumnCreator( + const ColumnPtr & local_discriminators_, + const String & variant_element_name_, + ColumnVariant::Discriminator global_variant_discriminator_, + ColumnVariant::Discriminator local_variant_discriminator_); + + DataTypePtr create(const DataTypePtr & prev) const override; + ColumnPtr create(const ColumnPtr & prev) const override; + SerializationPtr create(const SerializationPtr & prev) const override; + }; +private: + [[noreturn]] static void throwNoSerialization() + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Text/binary serialization is not implemented for variant element null map subcolumn"); + } + + friend SerializationVariant; + friend SerializationVariantElement; + + /// To be able to deserialize Variant element null map as a subcolumn + /// we need variant element type name and global discriminator. + String variant_element_name; + ColumnVariant::Discriminator variant_discriminator; + +}; + +} diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 7ab4235feeb..cb9fc76b1a8 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -716,6 +716,16 @@ static void writeFieldsToColumn( null_map_column->insertValue(0); } + else + { + // Column is not null but field is null. It's possible due to overrides + if (field.isNull()) + { + column_to.insertDefault(); + return false; + } + } + return true; }; @@ -791,7 +801,7 @@ static void writeFieldsToColumn( if (write_data_to_null_map(value, index)) { - const String & data = value.get(); + const String & data = value.safeGet(); casted_string_column->insertData(data.data(), data.size()); } } diff --git a/src/Disks/DiskFactory.cpp b/src/Disks/DiskFactory.cpp index de7ee5a74f4..4aa7f6ff564 100644 --- a/src/Disks/DiskFactory.cpp +++ b/src/Disks/DiskFactory.cpp @@ -27,7 +27,8 @@ DiskPtr DiskFactory::create( ContextPtr context, const DisksMap & map, bool attach, - bool custom_disk) const + bool custom_disk, + const std::unordered_set & skip_types) const { const auto disk_type = config.getString(config_prefix + ".type", "local"); @@ -38,6 +39,11 @@ DiskPtr DiskFactory::create( "DiskFactory: the disk '{}' has unknown disk type: {}", name, disk_type); } + if (skip_types.contains(found->first)) + { + return nullptr; + } + const auto & disk_creator = found->second; return disk_creator(name, config, config_prefix, context, map, attach, custom_disk); } diff --git a/src/Disks/DiskFactory.h b/src/Disks/DiskFactory.h index d03ffa6a40f..044ce81dbae 100644 --- a/src/Disks/DiskFactory.h +++ b/src/Disks/DiskFactory.h @@ -42,7 +42,8 @@ public: ContextPtr context, const DisksMap & map, bool attach = false, - bool custom_disk = false) const; + bool custom_disk = false, + const std::unordered_set & skip_types = {}) const; private: using DiskTypeRegistry = std::unordered_map; diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index a9260a249dd..f45d12618bf 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -7,7 +7,6 @@ #include #include -#include namespace DB { @@ -27,7 +26,8 @@ void DiskSelector::assertInitialized() const } -void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator) +void DiskSelector::initialize( + const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, DiskValidator disk_validator) { Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix, keys); @@ -36,6 +36,8 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, constexpr auto default_disk_name = "default"; bool has_default_disk = false; + constexpr auto local_disk_name = "local"; + bool has_local_disk = false; for (const auto & disk_name : keys) { if (!std::all_of(disk_name.begin(), disk_name.end(), isWordCharASCII)) @@ -44,21 +46,31 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, if (disk_name == default_disk_name) has_default_disk = true; + if (disk_name == local_disk_name) + has_local_disk = true; + const auto disk_config_prefix = config_prefix + "." + disk_name; if (disk_validator && !disk_validator(config, disk_config_prefix, disk_name)) continue; - - disks.emplace(disk_name, factory.create(disk_name, config, disk_config_prefix, context, disks)); + auto created_disk + = factory.create(disk_name, config, disk_config_prefix, context, disks, /*attach*/ false, /*custom_disk*/ false, skip_types); + if (created_disk.get()) + { + disks.emplace(disk_name, std::move(created_disk)); + } } if (!has_default_disk) { disks.emplace( - default_disk_name, - std::make_shared( - default_disk_name, context->getPath(), 0, context, config, config_prefix)); + default_disk_name, std::make_shared(default_disk_name, context->getPath(), 0, context, config, config_prefix)); } + if (!has_local_disk && (context->getApplicationType() == Context::ApplicationType::DISKS)) + { + throw_away_local_on_update = true; + disks.emplace(local_disk_name, std::make_shared(local_disk_name, "/", 0, context, config, config_prefix)); + } is_initialized = true; } @@ -76,6 +88,7 @@ DiskSelectorPtr DiskSelector::updateFromConfig( std::shared_ptr result = std::make_shared(*this); constexpr auto default_disk_name = "default"; + constexpr auto local_disk_name = "local"; DisksMap old_disks_minus_new_disks(result->getDisksMap()); for (const auto & disk_name : keys) @@ -86,7 +99,12 @@ DiskSelectorPtr DiskSelector::updateFromConfig( auto disk_config_prefix = config_prefix + "." + disk_name; if (!result->getDisksMap().contains(disk_name)) { - result->addToDiskMap(disk_name, factory.create(disk_name, config, disk_config_prefix, context, result->getDisksMap())); + auto created_disk = factory.create( + disk_name, config, disk_config_prefix, context, result->getDisksMap(), /*attach*/ false, /*custom_disk*/ false, skip_types); + if (created_disk) + { + result->addToDiskMap(disk_name, created_disk); + } } else { @@ -99,6 +117,10 @@ DiskSelectorPtr DiskSelector::updateFromConfig( } old_disks_minus_new_disks.erase(default_disk_name); + if (throw_away_local_on_update) + { + old_disks_minus_new_disks.erase(local_disk_name); + } if (!old_disks_minus_new_disks.empty()) { diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 6669b428158..49a1be5cf50 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -20,7 +20,7 @@ class DiskSelector public: static constexpr auto TMP_INTERNAL_DISK_PREFIX = "__tmp_internal_"; - DiskSelector() = default; + explicit DiskSelector(std::unordered_set skip_types_ = {}) : skip_types(skip_types_) { } DiskSelector(const DiskSelector & from) = default; using DiskValidator = std::function; @@ -48,6 +48,10 @@ private: bool is_initialized = false; void assertInitialized() const; + + const std::unordered_set skip_types; + + bool throw_away_local_on_update = false; }; } diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 79c2e6b4890..6de106893f3 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -243,6 +243,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.orc.output_row_index_stride = settings.output_format_orc_row_index_stride; format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder; format_settings.orc.filter_push_down = settings.input_format_orc_filter_push_down; + format_settings.orc.read_use_writer_time_zone = settings.input_format_orc_read_use_writer_time_zone; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 8ac783a1d86..446dc17a187 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -403,6 +403,7 @@ struct FormatSettings bool use_fast_decoder = true; bool filter_push_down = true; UInt64 output_row_index_stride = 10'000; + bool read_use_writer_time_zone = false; } orc{}; /// For capnProto format we should determine how to diff --git a/src/Functions/FunctionsBinaryRepresentation.cpp b/src/Functions/FunctionsBinaryRepresentation.cpp index 0f3f8be96a7..ab10d402df4 100644 --- a/src/Functions/FunctionsBinaryRepresentation.cpp +++ b/src/Functions/FunctionsBinaryRepresentation.cpp @@ -3,14 +3,14 @@ #include #include #include -#include -#include #include #include #include #include #include #include +#include +#include namespace DB { @@ -218,10 +218,7 @@ struct UnbinImpl static constexpr auto name = "unbin"; static constexpr size_t word_size = 8; - static void decode(const char * pos, const char * end, char *& out) - { - binStringDecode(pos, end, out); - } + static void decode(const char * pos, const char * end, char *& out) { binStringDecode(pos, end, out, word_size); } }; /// Encode number or string to string with binary or hexadecimal representation @@ -651,7 +648,15 @@ public: size_t size = in_offsets.size(); out_offsets.resize(size); - out_vec.resize(in_vec.size() / word_size + size); + + size_t max_out_len = 0; + for (size_t i = 0; i < in_offsets.size(); ++i) + { + const size_t len = in_offsets[i] - (i == 0 ? 0 : in_offsets[i - 1]) + - /* trailing zero symbol that is always added in ColumnString and that is ignored while decoding */ 1; + max_out_len += (len + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1; + } + out_vec.resize(max_out_len); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; @@ -661,6 +666,7 @@ public: { size_t new_offset = in_offsets[i]; + /// `new_offset - 1` because in ColumnString each string is stored with trailing zero byte Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset - 1]), pos); out_offsets[i] = pos - begin; @@ -668,6 +674,9 @@ public: prev_offset = new_offset; } + chassert( + static_cast(pos - begin) <= out_vec.size(), + fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size())); out_vec.resize(pos - begin); return col_res; @@ -680,11 +689,11 @@ public: ColumnString::Offsets & out_offsets = col_res->getOffsets(); const ColumnString::Chars & in_vec = col_fix_string->getChars(); - size_t n = col_fix_string->getN(); + const size_t n = col_fix_string->getN(); size_t size = col_fix_string->size(); out_offsets.resize(size); - out_vec.resize(in_vec.size() / word_size + size); + out_vec.resize(((n + word_size - 1) / word_size + /* trailing zero symbol that is always added by Impl::decode */ 1) * size); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; @@ -694,6 +703,7 @@ public: { size_t new_offset = prev_offset + n; + /// here we don't subtract 1 from `new_offset` because in ColumnFixedString strings are stored without trailing zero byte Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset]), pos); out_offsets[i] = pos - begin; @@ -701,6 +711,9 @@ public: prev_offset = new_offset; } + chassert( + static_cast(pos - begin) <= out_vec.size(), + fmt::format("too small amount of memory was preallocated: needed {}, but have only {}", pos - begin, out_vec.size())); out_vec.resize(pos - begin); return col_res; diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 24a40c45c6e..f5fb08f71d2 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -1,9 +1,12 @@ #pragma once -#include #include +#include #include +#include +#include #include +#include namespace DB { @@ -48,45 +51,75 @@ struct ReplaceRegexpImpl static constexpr int max_captures = 10; - static Instructions createInstructions(std::string_view replacement, int num_captures) + /// The replacement string references must not contain non-existing capturing groups. + static void checkSubstitutions(std::string_view replacement, int num_captures) { - Instructions instructions; - - String literals; for (size_t i = 0; i < replacement.size(); ++i) { if (replacement[i] == '\\' && i + 1 < replacement.size()) { - if (isNumericASCII(replacement[i + 1])) /// Substitution + if (isNumericASCII(replacement[i + 1])) /// substitution + { + int substitution_num = replacement[i + 1] - '0'; + if (substitution_num >= num_captures) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Substitution '\\{}' in replacement argument is invalid, regexp has only {} capturing groups", substitution_num, num_captures - 1); + } + } + } + } + + static Instructions createInstructions(std::string_view replacement, int num_captures) + { + checkSubstitutions(replacement, num_captures); + + Instructions instructions; + + String literals; + literals.reserve(replacement.size()); + + for (size_t i = 0; i < replacement.size(); ++i) + { + if (replacement[i] == '\\' && i + 1 < replacement.size()) + { + if (isNumericASCII(replacement[i + 1])) /// substitution { if (!literals.empty()) { instructions.emplace_back(literals); literals = ""; } - instructions.emplace_back(replacement[i + 1] - '0'); + int substitution_num = replacement[i + 1] - '0'; + instructions.emplace_back(substitution_num); } else - literals += replacement[i + 1]; /// Escaping + literals += replacement[i + 1]; /// escaping ++i; } else - literals += replacement[i]; /// Plain character + literals += replacement[i]; /// plain character } if (!literals.empty()) instructions.emplace_back(literals); - for (const auto & instr : instructions) - if (instr.substitution_num >= num_captures) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Id {} in replacement string is an invalid substitution, regexp has only {} capturing groups", - instr.substitution_num, num_captures - 1); - return instructions; } + static bool canFallbackToStringReplacement(const String & needle, const String & replacement, const re2::RE2 & searcher, int num_captures) + { + if (searcher.NumberOfCapturingGroups()) + return false; + + checkSubstitutions(replacement, num_captures); + + String required_substring; + bool is_trivial; + bool required_substring_is_prefix; + std::vector alternatives; + OptimizedRegularExpression::analyze(needle, required_substring, is_trivial, required_substring_is_prefix, alternatives); + return is_trivial && required_substring_is_prefix && required_substring == needle; + } + static void processString( const char * haystack_data, size_t haystack_length, @@ -124,7 +157,7 @@ struct ReplaceRegexpImpl { std::string_view replacement; if (instr.substitution_num >= 0) - replacement = std::string_view(matches[instr.substitution_num].data(), matches[instr.substitution_num].size()); + replacement = {matches[instr.substitution_num].data(), matches[instr.substitution_num].size()}; else replacement = instr.literal; res_data.resize(res_data.size() + replacement.size()); @@ -179,19 +212,32 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr re2::RE2 searcher(needle, regexp_options); - if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); + /// Try to use non-regexp string replacement. This shortcut is implemented only for const-needles + const-replacement as + /// pattern analysis incurs some cost too. + if (canFallbackToStringReplacement(needle, replacement, searcher, num_captures)) + { + auto convertTrait = [](ReplaceRegexpTraits::Replace first_or_all) + { + switch (first_or_all) + { + case ReplaceRegexpTraits::Replace::First: return ReplaceStringTraits::Replace::First; + case ReplaceRegexpTraits::Replace::All: return ReplaceStringTraits::Replace::All; + } + }; + ReplaceStringImpl::vectorConstantConstant(haystack_data, haystack_offsets, needle, replacement, res_data, res_offsets); + return; + } + Instructions instructions = createInstructions(replacement, num_captures); - /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < haystack_size; ++i) { size_t from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -221,10 +267,8 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr - /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < haystack_size; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -242,6 +286,7 @@ struct ReplaceRegexpImpl re2::RE2 searcher(needle, regexp_options); if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); + int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); Instructions instructions = createInstructions(replacement, num_captures); @@ -270,17 +315,14 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr re2::RE2 searcher(needle, regexp_options); - if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); - /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < haystack_size; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -290,8 +332,9 @@ struct ReplaceRegexpImpl size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0; const char * repl_data = reinterpret_cast(replacement_data.data() + repl_from); const size_t repl_length = static_cast(replacement_offsets[i] - repl_from - 1); + std::string_view replacement(repl_data, repl_length); - Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures); + Instructions instructions = createInstructions(replacement, num_captures); processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions); res_offsets[i] = res_offset; @@ -317,10 +360,8 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr - /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < haystack_size; ++i) { size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; @@ -338,12 +379,14 @@ struct ReplaceRegexpImpl size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0; const char * repl_data = reinterpret_cast(replacement_data.data() + repl_from); const size_t repl_length = static_cast(replacement_offsets[i] - repl_from - 1); + std::string_view replacement(repl_data, repl_length); re2::RE2 searcher(needle, regexp_options); if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); + int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); - Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures); + Instructions instructions = createInstructions(replacement, num_captures); processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions); res_offsets[i] = res_offset; @@ -367,16 +410,13 @@ struct ReplaceRegexpImpl res_offsets.resize(haystack_size); re2::RE2::Options regexp_options; - /// Don't write error messages to stderr. - regexp_options.set_log_errors(false); + regexp_options.set_log_errors(false); /// don't write error messages to stderr re2::RE2 searcher(needle, regexp_options); - if (!searcher.ok()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error()); int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures); - Instructions instructions = createInstructions(replacement, num_captures); for (size_t i = 0; i < haystack_size; ++i) diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index fa9b3dc92dd..0782f109187 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -13,6 +14,8 @@ #include #include #include +#include "Common/Logger.h" +#include "Common/logger_useful.h" #include #include #include @@ -28,6 +31,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; } using NullMap = PaddedPODArray; @@ -424,31 +428,21 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override { - if constexpr (std::is_same_v) + if (auto res = executeMap(arguments, result_type)) + return res; + + if (auto res = executeArrayLowCardinality(arguments)) + return res; + + auto new_arguments = arguments; + + for (auto & argument : new_arguments) { - if (isMap(arguments[0].type)) - { - auto non_const_map_column = arguments[0].column->convertToFullColumnIfConst(); - - const auto & map_column = assert_cast(*non_const_map_column); - const auto & map_array_column = map_column.getNestedColumn(); - auto offsets = map_array_column.getOffsetsPtr(); - auto keys = map_column.getNestedData().getColumnPtr(0); - auto array_column = ColumnArray::create(keys, offsets); - - const auto & type_map = assert_cast(*arguments[0].type); - auto array_type = std::make_shared(type_map.getKeyType()); - - auto arguments_copy = arguments; - arguments_copy[0].column = std::move(array_column); - arguments_copy[0].type = std::move(array_type); - arguments_copy[0].name = arguments[0].name; - - return executeArrayImpl(arguments_copy, result_type); - } + argument.column = recursiveRemoveLowCardinality(argument.column); + argument.type = recursiveRemoveLowCardinality(argument.type); } - return executeArrayImpl(arguments, result_type); + return executeArrayImpl(new_arguments, result_type); } private: @@ -458,18 +452,6 @@ private: using NullMaps = std::pair; - struct ExecutionData - { - const IColumn& left; - const IColumn& right; - const ColumnArray::Offsets& offsets; - ColumnPtr result_column; - NullMaps maps; - ResultColumnPtr result { ResultColumnType::create() }; - - void moveResult() { result_column = std::move(result); } - }; - static bool allowArguments(const DataTypePtr & inner_type, const DataTypePtr & arg) { auto inner_type_decayed = removeNullable(removeLowCardinality(inner_type)); @@ -574,23 +556,13 @@ private: } } -#define INTEGRAL_TPL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64 +#define INTEGRAL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64 ColumnPtr executeOnNonNullable(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const { - if (const auto* const left_arr = checkAndGetColumn(arguments[0].column.get())) - { - if (checkAndGetColumn(&left_arr->getData())) - { - if (auto res = executeLowCardinality(arguments)) - return res; - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal internal type of first argument of function {}", getName()); - } - } - ColumnPtr res; - if (!((res = executeIntegral(arguments)) + if (!((res = executeNothing(arguments)) + || (res = executeIntegral(arguments)) || (res = executeConst(arguments, result_type)) || (res = executeString(arguments)) || (res = executeGeneric(arguments)))) @@ -599,6 +571,8 @@ private: return res; } +#undef INTEGRAL_PACK + /** * The Array's internal data type may be quite tricky (containing a Nullable type somewhere). To process the * Nullable types correctly, for each data type specialisation we provide two null maps (one for the data and one @@ -627,6 +601,14 @@ private: return {null_map_data, null_map_item}; } + struct ExecutionData + { + const IColumn & left; + const IColumn & right; + const ColumnArray::Offsets & offsets; + NullMaps null_maps; + }; + /** * Given a variadic pack #Integral, apply executeIntegralExpanded with such parameters: * Integral s = {s1, s2, ...} @@ -635,39 +617,33 @@ private: template static ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * const left = checkAndGetColumn(arguments[0].column.get()); - - if (!left) + const auto * array = checkAndGetColumn(arguments[0].column.get()); + if (!array) return nullptr; - const ColumnPtr right_converted_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn& right = *right_converted_ptr.get(); - - ExecutionData data = { - left->getData(), - right, - left->getOffsets(), - nullptr, - getNullMaps(arguments) + ExecutionData data + { + .left = array->getData(), + .right = *arguments[1].column, + .offsets = array->getOffsets(), + .null_maps = getNullMaps(arguments), }; - if (executeIntegral(data)) - return data.result_column; - - return nullptr; + auto result = ResultColumnType::create(); + return executeIntegral(data, *result) ? std::move(result) : nullptr; } template - static bool executeIntegral(ExecutionData& data) + static bool executeIntegral(const ExecutionData & data, ResultColumnType & result) { - return (executeIntegralExpanded(data) || ...); + return (executeIntegralExpanded(data, result) || ...); } /// Invoke executeIntegralImpl with such parameters: (A, other1), (A, other2), ... template - static bool executeIntegralExpanded(ExecutionData& data) + static bool executeIntegralExpanded(const ExecutionData & data, ResultColumnType & result) { - return (executeIntegralImpl(data) || ...); + return (executeIntegralImpl(data, result) || ...); } /** @@ -676,40 +652,31 @@ private: * so we have to check all possible variants for #Initial and #Resulting types. */ template - static bool executeIntegralImpl(ExecutionData& data) + static bool executeIntegralImpl(const ExecutionData & data, ResultColumnType & result) { - const ColumnVector * col_nested = checkAndGetColumn>(&data.left); - - if (!col_nested) + const auto * left_typed = checkAndGetColumn>(&data.left); + if (!left_typed) return false; - const auto [null_map_data, null_map_item] = data.maps; - - if (data.right.onlyNull()) - Impl::Null::process( - data.offsets, - data.result->getData(), - null_map_data); - else if (const auto item_arg_const = checkAndGetColumnConst>(&data.right)) + if (const auto * item_arg_const = checkAndGetColumnConst>(&data.right)) Impl::Main::vector( - col_nested->getData(), + left_typed->getData(), data.offsets, item_arg_const->template getValue(), - data.result->getData(), - null_map_data, + result.getData(), + data.null_maps.first, nullptr); - else if (const auto item_arg_vector = checkAndGetColumn>(&data.right)) + else if (const auto * item_arg_vector = checkAndGetColumn>(&data.right)) Impl::Main::vector( - col_nested->getData(), + left_typed->getData(), data.offsets, item_arg_vector->getData(), - data.result->getData(), - null_map_data, - null_map_item); + result.getData(), + data.null_maps.first, + data.null_maps.second); else return false; - data.moveResult(); return true; } @@ -724,227 +691,161 @@ private: * * Tips and tricks tried can be found at https://github.com/ClickHouse/ClickHouse/pull/12550 . */ - static ColumnPtr executeLowCardinality(const ColumnsWithTypeAndName & arguments) + static ColumnPtr executeArrayLowCardinality(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * const col_array = checkAndGetColumn(arguments[0].column.get()); + const auto * col_array = checkAndGetColumn(arguments[0].column.get()); + const auto * col_array_const = checkAndGetColumnConstData(arguments[0].column.get()); - if (!col_array) + if (!col_array && !col_array_const) return nullptr; - const ColumnLowCardinality * const col_lc = checkAndGetColumn(&col_array->getData()); + if (col_array_const) + col_array = col_array_const; - if (!col_lc) + const auto * left_lc = checkAndGetColumn(&col_array->getData()); + if (!left_lc) return nullptr; - const auto [null_map_data, null_map_item] = getNullMaps(arguments); + const auto * right_const = checkAndGetColumn(arguments[1].column.get()); + if (!right_const) + return nullptr; - if (const ColumnConst * col_arg_const = checkAndGetColumn(&*arguments[1].column)) + const auto & array_type = assert_cast(*arguments[0].type); + const auto target_type = recursiveRemoveLowCardinality(array_type.getNestedType()); + auto right = recursiveRemoveLowCardinality(right_const->getDataColumnPtr()); + + UInt64 index = 0; + UInt64 left_size = arguments[0].column->size(); + ResultColumnPtr col_result = ResultColumnType::create(); + + if (!right->isNullAt(0)) { - const IColumnUnique & col_lc_dict = col_lc->getDictionary(); + auto right_type = recursiveRemoveLowCardinality(arguments[1].type); + right = castColumn({right, right_type, ""}, target_type); - const DataTypeArray * const array_type = checkAndGetDataType(arguments[0].type.get()); - const DataTypePtr target_type_ptr = recursiveRemoveLowCardinality(array_type->getNestedType()); + if (right->isNullable()) + right = checkAndGetColumn(*right).getNestedColumnPtr(); - ColumnPtr col_arg_cloned = castColumn( - {col_arg_const->getDataColumnPtr(), arguments[1].type, arguments[1].name}, target_type_ptr); + StringRef elem = right->getDataAt(0); + const auto & left_dict = left_lc->getDictionary(); - ResultColumnPtr col_result = ResultColumnType::create(); - UInt64 index = 0; - - if (!col_arg_cloned->isNullAt(0)) + if (std::optional maybe_index = left_dict.getOrFindValueIndex(elem); maybe_index) { - if (col_arg_cloned->isNullable()) - col_arg_cloned = checkAndGetColumn(*col_arg_cloned).getNestedColumnPtr(); - - StringRef elem = col_arg_cloned->getDataAt(0); - - if (std::optional maybe_index = col_lc_dict.getOrFindValueIndex(elem); maybe_index) - { - index = *maybe_index; - } - else - { - const size_t offsets_size = col_array->getOffsets().size(); - auto & data = col_result->getData(); - - data.resize_fill(offsets_size); - - return col_result; - } + index = *maybe_index; } - - Impl::Main::vector( - col_lc->getIndexes(), - col_array->getOffsets(), - index, /** Assuming LowCardinality has index of NULL always as zero. */ - col_result->getData(), - null_map_data, - null_map_item); - - return col_result; - } - else if (col_lc->nestedIsNullable()) // LowCardinality(Nullable(T)) and U - { - const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); // Nullable(T) - const ColumnNullable & left_nullable = checkAndGetColumn(*left_casted); - - const NullMap * const null_map_left_casted = &left_nullable.getNullMapColumn().getData(); - - const IColumn & left_ptr = left_nullable.getNestedColumn(); - - const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality(); - const ColumnNullable * const right_nullable = checkAndGetColumn(right_casted.get()); - - const NullMap * const null_map_right_casted = right_nullable - ? &right_nullable->getNullMapColumn().getData() - : null_map_item; - - const IColumn& right_ptr = right_nullable - ? right_nullable->getNestedColumn() - : *right_casted.get(); - - ExecutionData data = + else { - left_ptr, right_ptr, - col_array->getOffsets(), - nullptr, - {null_map_left_casted, null_map_right_casted}}; + col_result->getData().resize_fill(col_array->size()); - if (dispatchConvertedLowCardinalityColumns(data)) - return data.result_column; + if (col_array_const) + return ColumnConst::create(std::move(col_result), left_size); + + return col_result; + } } - else // LowCardinality(T) and U, T not Nullable - { - if (arguments[1].column->isNullable()) - return nullptr; - - if (const auto* const arg_lc = checkAndGetColumn(arguments[1].column.get()); - arg_lc && arg_lc->isNullable()) - return nullptr; - - // LowCardinality(T) and U (possibly LowCardinality(V)) - - const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality(); - const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality(); - - ExecutionData data = - { - *left_casted.get(), *right_casted.get(), col_array->getOffsets(), - nullptr, {null_map_data, null_map_item} - }; - - if (dispatchConvertedLowCardinalityColumns(data)) - return data.result_column; - } - - return nullptr; - } - - static bool dispatchConvertedLowCardinalityColumns(ExecutionData & data) - { - if (data.left.isNumeric() && data.right.isNumeric()) // ColumnArrays - return executeIntegral(data); - - if (checkAndGetColumn(&data.left)) - return executeStringImpl(data); Impl::Main::vector( - data.left, - data.offsets, data.right, - data.result->getData(), - data.maps.first, data.maps.second); + left_lc->getIndexes(), + col_array->getOffsets(), + index, /** Assuming LowCardinality has index of NULL always as zero. */ + col_result->getData(), + nullptr, + nullptr); - data.moveResult(); - return true; + if (col_array_const) + return ColumnConst::create(std::move(col_result), left_size); + + return col_result; } -#undef INTEGRAL_TPL_PACK + ColumnPtr executeMap(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const + { + if constexpr (!std::is_same_v) + return nullptr; + + if (!isMap(arguments[0].type)) + return nullptr; + + auto non_const_map_column = arguments[0].column->convertToFullColumnIfConst(); + + const auto & map_column = assert_cast(*non_const_map_column); + const auto & map_array_column = map_column.getNestedColumn(); + auto offsets = map_array_column.getOffsetsPtr(); + auto keys = map_column.getNestedData().getColumnPtr(0); + auto array_column = ColumnArray::create(keys, offsets); + + const auto & type_map = assert_cast(*arguments[0].type); + auto array_type = std::make_shared(type_map.getKeyType()); + + auto arguments_copy = arguments; + arguments_copy[0].column = std::move(array_column); + arguments_copy[0].type = std::move(array_type); + arguments_copy[0].name = arguments[0].name; + + return executeArrayImpl(arguments_copy, result_type); + } static ColumnPtr executeString(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * array = checkAndGetColumn(arguments[0].column.get()); - + const auto * array = checkAndGetColumn(arguments[0].column.get()); if (!array) return nullptr; - const ColumnString * left = checkAndGetColumn(&array->getData()); - + const auto * left = checkAndGetColumn(&array->getData()); if (!left) return nullptr; - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn & right = *right_ptr.get(); + const auto & right = *arguments[1].column; + const auto [null_map_data, null_map_item] = getNullMaps(arguments); - ExecutionData data = { - *left, right, array->getOffsets(), - nullptr, getNullMaps(arguments), - std::move(ResultColumnType::create()) - }; + auto result = ResultColumnType::create(); - if (executeStringImpl(data)) - return data.result_column; - - return nullptr; - } - - static bool executeStringImpl(ExecutionData& data) - { - const auto [null_map_data, null_map_item] = data.maps; - const ColumnString& left = *typeid_cast(&data.left); - - if (data.right.onlyNull()) - Impl::Null::process( - data.offsets, - data.result->getData(), - null_map_data); - else if (const auto *const item_arg_const = checkAndGetColumnConstStringOrFixedString(&data.right)) + if (const auto * item_arg_const = checkAndGetColumnConstStringOrFixedString(&right)) { - const ColumnString * item_const_string = - checkAndGetColumn(&item_arg_const->getDataColumn()); - - const ColumnFixedString * item_const_fixedstring = - checkAndGetColumn(&item_arg_const->getDataColumn()); + const auto * item_const_string = checkAndGetColumn(&item_arg_const->getDataColumn()); + const auto * item_const_fixedstring = checkAndGetColumn(&item_arg_const->getDataColumn()); if (item_const_string) Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_const_string->getChars(), item_const_string->getDataAt(0).size, - data.result->getData(), + result->getData(), null_map_data, null_map_item); else if (item_const_fixedstring) Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_const_fixedstring->getChars(), item_const_fixedstring->getN(), - data.result->getData(), + result->getData(), null_map_data, null_map_item); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Logical error: ColumnConst contains not String nor FixedString column"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnConst contains not String nor FixedString column"); } - else if (const auto *const item_arg_vector = checkAndGetColumn(&data.right)) + else if (const auto * item_arg_vector = checkAndGetColumn(&right)) { Impl::String::process( - left.getChars(), - data.offsets, - left.getOffsets(), + left->getChars(), + array->getOffsets(), + left->getOffsets(), item_arg_vector->getChars(), item_arg_vector->getOffsets(), - data.result->getData(), + result->getData(), null_map_data, null_map_item); } else - return false; + { + return nullptr; + } - data.moveResult(); - return true; + return result; } static ColumnPtr executeConst(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) @@ -955,9 +856,7 @@ private: return nullptr; Array arr = col_array->getValue(); - - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn * item_arg = right_ptr.get(); + const IColumn * item_arg = arguments[1].column.get(); if (isColumnConst(*item_arg)) { @@ -1026,48 +925,59 @@ private: } } + static ColumnPtr executeNothing(const ColumnsWithTypeAndName & arguments) + { + const auto * array = checkAndGetColumn(arguments[0].column.get()); + if (!array) + return nullptr; + + if (arguments[1].column->onlyNull()) + { + auto result = ResultColumnType::create(); + Impl::Null::process(array->getOffsets(), result->getData(), getNullMaps(arguments).first); + return result; + } + + return nullptr; + } + static ColumnPtr executeGeneric(const ColumnsWithTypeAndName & arguments) { - const ColumnArray * col = checkAndGetColumn(arguments[0].column.get()); - - if (!col) + const auto * col_array = checkAndGetColumn(arguments[0].column.get()); + if (!col_array) return nullptr; DataTypePtr array_elements_type = assert_cast(*arguments[0].type).getNestedType(); const DataTypePtr & index_type = arguments[1].type; - DataTypePtr common_type = getLeastSupertype(DataTypes{array_elements_type, index_type}); - - ColumnPtr col_nested = castColumn({ col->getDataPtr(), array_elements_type, "" }, common_type); - - const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - ColumnPtr item_arg = castColumn({ right_ptr, removeLowCardinality(index_type), "" }, common_type); + DataTypePtr common_type = getLeastSupertype(DataTypes{array_elements_type, arguments[1].type}); + ColumnPtr col_nested = castColumn({ col_array->getDataPtr(), array_elements_type, "" }, common_type); + ColumnPtr item_arg = castColumn({ arguments[1].column, removeLowCardinality(index_type), "" }, common_type); auto col_res = ResultColumnType::create(); auto [null_map_data, null_map_item] = getNullMaps(arguments); - if (item_arg->onlyNull()) - Impl::Null::process( - col->getOffsets(), - col_res->getData(), - null_map_data); - else if (isColumnConst(*item_arg)) + if (const auto * item_arg_const = checkAndGetColumn(item_arg.get())) + { Impl::Main::vector( *col_nested, - col->getOffsets(), - typeid_cast(*item_arg).getDataColumn(), + col_array->getOffsets(), + item_arg_const->getDataColumn(), col_res->getData(), /// TODO This is wrong. null_map_data, nullptr); + } else + { Impl::Main::vector( *col_nested, - col->getOffsets(), + col_array->getOffsets(), *item_arg, col_res->getData(), null_map_data, null_map_item); + } return col_res; } diff --git a/src/Functions/tuple.cpp b/src/Functions/tuple.cpp index c83195bc976..71f50abba6c 100644 --- a/src/Functions/tuple.cpp +++ b/src/Functions/tuple.cpp @@ -5,7 +5,17 @@ namespace DB REGISTER_FUNCTION(Tuple) { - factory.registerFunction(); + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Returns a tuple by grouping input arguments. + +For columns C1, C2, ... with the types T1, T2, ..., it returns a named Tuple(C1 T1, C2 T2, ...) type tuple containing these columns if their names are unique and can be treated as unquoted identifiers, otherwise a Tuple(T1, T2, ...) is returned. There is no cost to execute the function. +Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table. + +The function implements the operator `(x, y, ...)`. +)", + .examples{{"typical", "SELECT tuple(1, 2)", "(1,2)"}}, + .categories{"Miscellaneous"}}); } } diff --git a/src/Functions/tuple.h b/src/Functions/tuple.h index 8b3e041f781..94529d86861 100644 --- a/src/Functions/tuple.h +++ b/src/Functions/tuple.h @@ -6,20 +6,28 @@ #include #include #include +#include namespace DB { -/** tuple(x, y, ...) is a function that allows you to group several columns +/** tuple(x, y, ...) is a function that allows you to group several columns. * tupleElement(tuple, n) is a function that allows you to retrieve a column from tuple. */ class FunctionTuple : public IFunction { + bool enable_named_columns; + public: static constexpr auto name = "tuple"; /// maybe_unused: false-positive - [[ maybe_unused ]] static FunctionPtr create(ContextPtr) { return std::make_shared(); } + [[maybe_unused]] static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context->getSettingsRef().enable_named_columns_in_function_tuple); + } + + explicit FunctionTuple(bool enable_named_columns_ = false) : enable_named_columns(enable_named_columns_) { } String getName() const override { return name; } @@ -38,9 +46,26 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - return std::make_shared(arguments); + if (arguments.empty()) + return std::make_shared(DataTypes{}); + + DataTypes types; + Names names; + NameSet name_set; + for (const auto & argument : arguments) + { + types.emplace_back(argument.type); + names.emplace_back(argument.name); + name_set.emplace(argument.name); + } + + if (enable_named_columns && name_set.size() == names.size() + && std::all_of(names.cbegin(), names.cend(), [](const auto & n) { return isUnquotedIdentifier(n); })) + return std::make_shared(types, names); + else + return std::make_shared(types); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override @@ -53,9 +78,9 @@ public: for (size_t i = 0; i < tuple_size; ++i) { /** If tuple is mixed of constant and not constant columns, - * convert all to non-constant columns, - * because many places in code expect all non-constant columns in non-constant tuple. - */ + * convert all to non-constant columns, + * because many places in code expect all non-constant columns in non-constant tuple. + */ tuple_columns[i] = arguments[i].column->convertToFullColumnIfConst(); } return ColumnTuple::create(tuple_columns); diff --git a/src/Functions/tupleNames.cpp b/src/Functions/tupleNames.cpp new file mode 100644 index 00000000000..e444478c224 --- /dev/null +++ b/src/Functions/tupleNames.cpp @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +/** Transform a named tuple into names, which is a constant array of strings. + */ +class ExecutableFunctionTupleNames : public IExecutableFunction +{ +public: + static constexpr auto name = "tupleNames"; + + explicit ExecutableFunctionTupleNames(Array name_fields_) : name_fields(std::move(name_fields_)) { } + + String getName() const override { return name; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override + { + return result_type->createColumnConst(input_rows_count, name_fields); + } + +private: + Array name_fields; +}; + +class FunctionBaseTupleNames : public IFunctionBase +{ +public: + static constexpr auto name = "tupleNames"; + + explicit FunctionBaseTupleNames(DataTypePtr argument_type, DataTypePtr result_type_, Array name_fields_) + : argument_types({std::move(argument_type)}), result_type(std::move(result_type_)), name_fields(std::move(name_fields_)) + { + } + + String getName() const override { return name; } + + bool isSuitableForConstantFolding() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + const DataTypes & getArgumentTypes() const override { return argument_types; } + + const DataTypePtr & getResultType() const override { return result_type; } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + return std::make_unique(name_fields); + } + +private: + DataTypes argument_types; + DataTypePtr result_type; + Array name_fields; +}; + +class TupleNamesOverloadResolver : public IFunctionOverloadResolver +{ +public: + static constexpr auto name = "tupleNames"; + + static FunctionOverloadResolverPtr create(ContextPtr) { return std::make_unique(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + const DataTypeTuple * tuple = checkAndGetDataType(arguments[0].type.get()); + + if (!tuple) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a tuple", getName()); + + return std::make_shared(std::make_shared()); + } + + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override + { + const DataTypeTuple * tuple = checkAndGetDataType(arguments[0].type.get()); + + if (!tuple) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a tuple", getName()); + + DataTypes types = tuple->getElements(); + Array name_fields; + for (const auto & elem_name : tuple->getElementNames()) + name_fields.emplace_back(elem_name); + + return std::make_unique(arguments[0].type, result_type, std::move(name_fields)); + } +}; + +} + +REGISTER_FUNCTION(TupleNames) +{ + factory.registerFunction(FunctionDocumentation{ + .description = R"( +Converts a tuple into an array of column names. For a tuple in the form `Tuple(a T, b T, ...)`, it returns an array of strings representing the named columns of the tuple. If the tuple elements do not have explicit names, their indices will be used as the column names instead. +)", + .examples{{"typical", "SELECT tupleNames(tuple(1 as a, 2 as b))", "['a','b']"}}, + .categories{"Miscellaneous"}}); +} + +} diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 0ec733f7840..da38bccdea1 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -44,7 +44,7 @@ struct Memory : boost::noncopyable, Allocator char * m_data = nullptr; size_t alignment = 0; - [[maybe_unused]] bool allow_gwp_asan_force_sample; + [[maybe_unused]] bool allow_gwp_asan_force_sample{false}; Memory() = default; diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 303ffb744b5..b753e66da48 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -713,8 +713,12 @@ ReadWriteBufferFromHTTP::HTTPFileInfo ReadWriteBufferFromHTTP::getFileInfo() /// fall back to slow whole-file reads when HEAD is actually supported; that sounds /// like a nightmare to debug.) if (e.getHTTPStatus() >= 400 && e.getHTTPStatus() <= 499 && - e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS) + e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS && + e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_REQUEST_TIMEOUT && + e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_MISDIRECTED_REQUEST) + { return HTTPFileInfo{}; + } throw; } diff --git a/src/IO/examples/CMakeLists.txt b/src/IO/examples/CMakeLists.txt index 12b85c483a1..fc9d9c7dcd1 100644 --- a/src/IO/examples/CMakeLists.txt +++ b/src/IO/examples/CMakeLists.txt @@ -59,10 +59,10 @@ clickhouse_add_executable (parse_date_time_best_effort parse_date_time_best_effo target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io) clickhouse_add_executable (zlib_ng_bug zlib_ng_bug.cpp) -target_link_libraries (zlib_ng_bug PRIVATE ch_contrib::zlib) +target_link_libraries (zlib_ng_bug PRIVATE ch_contrib::zlib clickhouse_common_io) clickhouse_add_executable (dragonbox_test dragonbox_test.cpp) -target_link_libraries (dragonbox_test PRIVATE ch_contrib::dragonbox_to_chars) +target_link_libraries (dragonbox_test PRIVATE ch_contrib::dragonbox_to_chars clickhouse_common_io) clickhouse_add_executable (zstd_buffers zstd_buffers.cpp) target_link_libraries (zstd_buffers PRIVATE clickhouse_common_io) diff --git a/src/Interpreters/Access/InterpreterSetRoleQuery.cpp b/src/Interpreters/Access/InterpreterSetRoleQuery.cpp index 24467923542..99a7a73d46c 100644 --- a/src/Interpreters/Access/InterpreterSetRoleQuery.cpp +++ b/src/Interpreters/Access/InterpreterSetRoleQuery.cpp @@ -29,33 +29,12 @@ BlockIO InterpreterSetRoleQuery::execute() void InterpreterSetRoleQuery::setRole(const ASTSetRoleQuery & query) { - auto & access_control = getContext()->getAccessControl(); auto session_context = getContext()->getSessionContext(); - auto user = session_context->getUser(); if (query.kind == ASTSetRoleQuery::Kind::SET_ROLE_DEFAULT) - { session_context->setCurrentRolesDefault(); - } else - { - RolesOrUsersSet roles_from_query{*query.roles, access_control}; - std::vector new_current_roles; - if (roles_from_query.all) - { - new_current_roles = user->granted_roles.findGranted(roles_from_query); - } - else - { - for (const auto & id : roles_from_query.getMatchingIDs()) - { - if (!user->granted_roles.isGranted(id)) - throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role should be granted to set current"); - new_current_roles.emplace_back(id); - } - } - session_context->setCurrentRoles(new_current_roles); - } + session_context->setCurrentRoles(RolesOrUsersSet{*query.roles, session_context->getAccessControl()}); } diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index dd1166a9228..56055e7044c 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -301,13 +301,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto & insert_query = query->as(); insert_query.async_insert_flush = true; - InterpreterInsertQuery interpreter( - query, - query_context, - query_context->getSettingsRef().insert_allow_materialized_columns, - /* no_squash */ false, - /* no_destination */ false, - /* async_insert */ false); + InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); auto table = interpreter.getTable(insert_query); auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); @@ -732,7 +726,10 @@ try /// Access rights must be checked for the user who executed the initial INSERT query. if (key.user_id) - insert_context->setUser(*key.user_id, key.current_roles); + { + insert_context->setUser(*key.user_id); + insert_context->setCurrentRoles(key.current_roles); + } insert_context->setSettings(key.settings); @@ -787,12 +784,7 @@ try try { interpreter = std::make_unique( - key.query, - insert_context, - key.settings.insert_allow_materialized_columns, - false, - false, - true); + key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); pipeline = interpreter->execute().pipeline; chassert(pipeline.pushing()); @@ -1011,7 +1003,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( } Chunk chunk(executor.getResultColumns(), total_rows); - chunk.getChunkInfos().add(std::move(chunk_info)); + chunk.setChunkInfo(std::move(chunk_info)); return chunk; } @@ -1063,7 +1055,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( } Chunk chunk(std::move(result_columns), total_rows); - chunk.getChunkInfos().add(std::move(chunk_info)); + chunk.setChunkInfo(std::move(chunk_info)); return chunk; } diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 91c0c592f28..5d56ef09127 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -8,23 +8,28 @@ #include #include #include +#include #include -#include -#include #include +#include +#include +#include +#include +#include #include +#include #include #include -#include #include +#include #include +#include #include #include #include -#include +#include #include -#include -#include +#include namespace DB { @@ -33,7 +38,6 @@ namespace ErrorCodes { extern const int TOO_LARGE_DISTRIBUTED_DEPTH; extern const int LOGICAL_ERROR; - extern const int CLUSTER_DOESNT_EXIST; extern const int UNEXPECTED_CLUSTER; } @@ -172,7 +176,7 @@ ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster, /// in case of parallel replicas custom key use round robing load balancing /// so custom key partitions will be spread over nodes in round-robin fashion - if (context->canUseParallelReplicasCustomKey(cluster) && !settings.load_balancing.changed) + if (context->canUseParallelReplicasCustomKeyForCluster(cluster) && !settings.load_balancing.changed) { new_settings.load_balancing = LoadBalancing::ROUND_ROBIN; } @@ -180,6 +184,10 @@ ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster, auto new_context = Context::createCopy(context); new_context->setSettings(new_settings); new_context->setClientInfo(new_client_info); + + if (context->canUseParallelReplicasCustomKeyForCluster(cluster)) + new_context->disableOffsetParallelReplicas(); + return new_context; } @@ -220,6 +228,35 @@ static ThrottlerPtr getThrottler(const ContextPtr & context) return throttler; } +AdditionalShardFilterGenerator +getShardFilterGeneratorForCustomKey(const Cluster & cluster, ContextPtr context, const ColumnsDescription & columns) +{ + if (!context->canUseParallelReplicasCustomKeyForCluster(cluster)) + return {}; + + const auto & settings = context->getSettingsRef(); + auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *context); + if (custom_key_ast == nullptr) + return {}; + + return [my_custom_key_ast = std::move(custom_key_ast), + column_description = columns, + custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, + custom_key_range_lower = settings.parallel_replicas_custom_key_range_lower.value, + custom_key_range_upper = settings.parallel_replicas_custom_key_range_upper.value, + query_context = context, + replica_count = cluster.getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr + { + return getCustomKeyFilterForParallelReplica( + replica_count, + replica_num - 1, + my_custom_key_ast, + {custom_key_type, custom_key_range_lower, custom_key_range_upper}, + column_description, + query_context); + }; +} + void executeQuery( QueryPlan & query_plan, @@ -412,14 +449,7 @@ void executeQueryWithParallelReplicas( const auto & settings = context->getSettingsRef(); /// check cluster for parallel replicas - if (settings.cluster_for_parallel_replicas.value.empty()) - { - throw Exception( - ErrorCodes::CLUSTER_DOESNT_EXIST, - "Reading in parallel from replicas is enabled but cluster to execute query is not provided. Please set " - "'cluster_for_parallel_replicas' setting"); - } - auto not_optimized_cluster = context->getCluster(settings.cluster_for_parallel_replicas); + auto not_optimized_cluster = context->getClusterForParallelReplicas(); auto new_context = Context::createCopy(context); @@ -542,6 +572,84 @@ void executeQueryWithParallelReplicas( executeQueryWithParallelReplicas(query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits); } +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const Block & header, + ContextPtr context) +{ + /// Return directly (with correct header) if no shard to query. + if (query_info.getCluster()->getShardsInfo().empty()) + { + if (context->getSettingsRef().allow_experimental_analyzer) + return; + + Pipe pipe(std::make_shared(header)); + auto read_from_pipe = std::make_unique(std::move(pipe)); + read_from_pipe->setStepDescription("Read from NullSource (Distributed)"); + query_plan.addStep(std::move(read_from_pipe)); + return; + } + + ColumnsDescriptionByShardNum columns_object; + if (hasDynamicSubcolumns(columns)) + columns_object = getExtendedObjectsOfRemoteTables(*query_info.cluster, storage_id, columns, context); + + ClusterProxy::SelectStreamFactory select_stream_factory + = ClusterProxy::SelectStreamFactory(header, columns_object, snapshot, processed_stage); + + auto shard_filter_generator = getShardFilterGeneratorForCustomKey(*query_info.getCluster(), context, columns); + + ClusterProxy::executeQuery( + query_plan, + header, + processed_stage, + storage_id, + /*table_func_ptr=*/nullptr, + select_stream_factory, + getLogger("executeQueryWithParallelReplicasCustomKey"), + context, + query_info, + /*sharding_key_expr=*/nullptr, + /*sharding_key_column_name=*/{}, + /*distributed_settings=*/{}, + shard_filter_generator, + /*is_remote_function=*/false); +} + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const QueryTreeNodePtr & query_tree, + ContextPtr context) +{ + auto header = InterpreterSelectQueryAnalyzer::getSampleBlock(query_tree, context, SelectQueryOptions(processed_stage).analyze()); + executeQueryWithParallelReplicasCustomKey(query_plan, storage_id, query_info, columns, snapshot, processed_stage, header, context); +} + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + SelectQueryInfo query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const ASTPtr & query_ast, + ContextPtr context) +{ + auto header = InterpreterSelectQuery(query_ast, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + query_info.query = ClusterProxy::rewriteSelectQuery( + context, query_info.query, storage_id.getDatabaseName(), storage_id.getTableName(), /*table_function_ptr=*/nullptr); + executeQueryWithParallelReplicasCustomKey(query_plan, storage_id, query_info, columns, snapshot, processed_stage, header, context); +} } } diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 6548edf8939..c22fcd24f03 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -1,7 +1,7 @@ #pragma once -#include #include +#include #include namespace DB @@ -13,6 +13,11 @@ class Cluster; using ClusterPtr = std::shared_ptr; struct SelectQueryInfo; +class ColumnsDescription; +struct StorageSnapshot; + +using StorageSnapshotPtr = std::shared_ptr; + class Pipe; class QueryPlan; @@ -47,6 +52,9 @@ class SelectStreamFactory; ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table); using AdditionalShardFilterGenerator = std::function; +AdditionalShardFilterGenerator +getShardFilterGeneratorForCustomKey(const Cluster & cluster, ContextPtr context, const ColumnsDescription & columns); + /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. /// `stream_factory` object encapsulates the logic of creating plans for a different type of query /// (currently SELECT, DESCRIBE). @@ -91,6 +99,36 @@ void executeQueryWithParallelReplicas( const PlannerContextPtr & planner_context, ContextPtr context, std::shared_ptr storage_limits); + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const Block & header, + ContextPtr context); + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + const SelectQueryInfo & query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const QueryTreeNodePtr & query_tree, + ContextPtr context); + +void executeQueryWithParallelReplicasCustomKey( + QueryPlan & query_plan, + const StorageID & storage_id, + SelectQueryInfo query_info, + const ColumnsDescription & columns, + const StorageSnapshotPtr & snapshot, + QueryProcessingStage::Enum processed_stage, + const ASTPtr & query_ast, + ContextPtr context); } } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b946c2cb21e..adb20f4854f 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -190,6 +191,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; extern const int CLUSTER_DOESNT_EXIST; + extern const int SET_NON_GRANTED_ROLE; } #define SHUTDOWN(log, desc, ptr, method) do \ @@ -1303,7 +1305,7 @@ ConfigurationPtr Context::getUsersConfig() return shared->users_config; } -void Context::setUser(const UUID & user_id_, const std::optional> & current_roles_) +void Context::setUser(const UUID & user_id_) { /// Prepare lists of user's profiles, constraints, settings, roles. /// NOTE: AccessControl::read() and other AccessControl's functions may require some IO work, @@ -1312,8 +1314,8 @@ void Context::setUser(const UUID & user_id_, const std::optional(user_id_); - auto new_current_roles = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles); - auto enabled_roles = access_control.getEnabledRolesInfo(new_current_roles, {}); + auto default_roles = user->granted_roles.findGranted(user->default_roles); + auto enabled_roles = access_control.getEnabledRolesInfo(default_roles, {}); auto enabled_profiles = access_control.getEnabledSettingsInfo(user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles); const auto & database = user->default_database; @@ -1327,7 +1329,7 @@ void Context::setUser(const UUID & user_id_, const std::optional Context::getUserID() const return user_id; } -void Context::setCurrentRolesWithLock(const std::vector & current_roles_, const std::lock_guard &) +void Context::setCurrentRolesWithLock(const std::vector & new_current_roles, const std::lock_guard &) { - if (current_roles_.empty()) + if (new_current_roles.empty()) current_roles = nullptr; else - current_roles = std::make_shared>(current_roles_); + current_roles = std::make_shared>(new_current_roles); need_recalculate_access = true; } -void Context::setCurrentRoles(const std::vector & current_roles_) +void Context::setCurrentRolesImpl(const std::vector & new_current_roles, bool throw_if_not_granted, bool skip_if_not_granted, const std::shared_ptr & user) { - std::lock_guard lock(mutex); - setCurrentRolesWithLock(current_roles_, lock); + if (skip_if_not_granted) + { + auto filtered_role_ids = user->granted_roles.findGranted(new_current_roles); + std::lock_guard lock{mutex}; + setCurrentRolesWithLock(filtered_role_ids, lock); + return; + } + if (throw_if_not_granted) + { + for (const auto & role_id : new_current_roles) + { + if (!user->granted_roles.isGranted(role_id)) + { + auto role_name = getAccessControl().tryReadName(role_id); + throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role {} should be granted to set as a current", role_name.value_or(toString(role_id))); + } + } + } + std::lock_guard lock2{mutex}; + setCurrentRolesWithLock(new_current_roles, lock2); +} + +void Context::setCurrentRoles(const std::vector & new_current_roles, bool check_grants) +{ + setCurrentRolesImpl(new_current_roles, /* throw_if_not_granted= */ check_grants, /* skip_if_not_granted= */ !check_grants, getUser()); +} + +void Context::setCurrentRoles(const RolesOrUsersSet & new_current_roles, bool check_grants) +{ + if (new_current_roles.all) + { + auto user = getUser(); + setCurrentRolesImpl(user->granted_roles.findGranted(new_current_roles), /* throw_if_not_granted= */ false, /* skip_if_not_granted= */ false, user); + } + else + { + setCurrentRoles(new_current_roles.getMatchingIDs(), check_grants); + } +} + +void Context::setCurrentRoles(const Strings & new_current_roles, bool check_grants) +{ + setCurrentRoles(getAccessControl().getIDs(new_current_roles), check_grants); } void Context::setCurrentRolesDefault() { auto user = getUser(); - setCurrentRoles(user->granted_roles.findGranted(user->default_roles)); + setCurrentRolesImpl(user->granted_roles.findGranted(user->default_roles), /* throw_if_not_granted= */ false, /* skip_if_not_granted= */ false, user); } std::vector Context::getCurrentRoles() const @@ -2238,6 +2281,12 @@ void Context::setSetting(std::string_view name, const Field & value) contextSanityClampSettingsWithLock(*this, settings, lock); } +void Context::setServerSetting(std::string_view name, const Field & value) +{ + std::lock_guard lock(mutex); + shared->server_settings.set(name, value); +} + void Context::applySettingChange(const SettingChange & change) { try @@ -5469,10 +5518,37 @@ bool Context::canUseParallelReplicasOnFollower() const return canUseTaskBasedParallelReplicas() && getClientInfo().collaborate_with_initiator; } -bool Context::canUseParallelReplicasCustomKey(const Cluster & cluster) const +bool Context::canUseParallelReplicasCustomKey() const { - return settings.max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY - && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1; + return settings.max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY; +} + +bool Context::canUseParallelReplicasCustomKeyForCluster(const Cluster & cluster) const +{ + return canUseParallelReplicasCustomKey() && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1; +} + +bool Context::canUseOffsetParallelReplicas() const +{ + return offset_parallel_replicas_enabled && settings.max_parallel_replicas > 1 + && getParallelReplicasMode() != Context::ParallelReplicasMode::READ_TASKS; +} + +void Context::disableOffsetParallelReplicas() +{ + offset_parallel_replicas_enabled = false; +} + +ClusterPtr Context::getClusterForParallelReplicas() const +{ + /// check cluster for parallel replicas + if (settings.cluster_for_parallel_replicas.value.empty()) + throw Exception( + ErrorCodes::CLUSTER_DOESNT_EXIST, + "Reading in parallel from replicas is enabled but cluster to execute query is not provided. Please set " + "'cluster_for_parallel_replicas' setting"); + + return getCluster(settings.cluster_for_parallel_replicas); } void Context::setPreparedSetsCache(const PreparedSetsCachePtr & cache) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index d3f152b7a67..8c5492bcbc8 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -61,6 +61,7 @@ class AccessFlags; struct AccessRightsElement; class AccessRightsElements; enum class RowPolicyFilterType : uint8_t; +struct RolesOrUsersSet; class EmbeddedDictionaries; class ExternalDictionariesLoader; class ExternalUserDefinedExecutableFunctionsLoader; @@ -150,6 +151,8 @@ class AsyncLoader; struct TemporaryTableHolder; using TemporaryTablesMapping = std::map>; +using ClusterPtr = std::shared_ptr; + class LoadTask; using LoadTaskPtr = std::shared_ptr; using LoadTaskPtrs = std::vector; @@ -457,6 +460,11 @@ protected: /// mutation tasks of one mutation executed against different parts of the same table. PreparedSetsCachePtr prepared_sets_cache; + /// this is a mode of parallel replicas where we set parallel_replicas_count and parallel_replicas_offset + /// and generate specific filters on the replicas (e.g. when using parallel replicas with sample key) + /// if we already use a different mode of parallel replicas we want to disable this mode + bool offset_parallel_replicas_enabled = true; + public: /// Some counters for current query execution. /// Most of them are workarounds and should be removed in the future. @@ -600,13 +608,15 @@ public: /// Sets the current user assuming that he/she is already authenticated. /// WARNING: This function doesn't check password! - void setUser(const UUID & user_id_, const std::optional> & current_roles_ = {}); + void setUser(const UUID & user_id_); UserPtr getUser() const; std::optional getUserID() const; String getUserName() const; - void setCurrentRoles(const std::vector & current_roles_); + void setCurrentRoles(const Strings & new_current_roles, bool check_grants = true); + void setCurrentRoles(const std::vector & new_current_roles, bool check_grants = true); + void setCurrentRoles(const RolesOrUsersSet & new_current_roles, bool check_grants = true); void setCurrentRolesDefault(); std::vector getCurrentRoles() const; std::vector getEnabledRoles() const; @@ -823,6 +833,7 @@ public: /// Set settings by name. void setSetting(std::string_view name, const String & value); void setSetting(std::string_view name, const Field & value); + void setServerSetting(std::string_view name, const Field & value); void applySettingChange(const SettingChange & change); void applySettingsChanges(const SettingsChanges & changes); @@ -1308,7 +1319,13 @@ public: bool canUseTaskBasedParallelReplicas() const; bool canUseParallelReplicasOnInitiator() const; bool canUseParallelReplicasOnFollower() const; - bool canUseParallelReplicasCustomKey(const Cluster & cluster) const; + bool canUseParallelReplicasCustomKey() const; + bool canUseParallelReplicasCustomKeyForCluster(const Cluster & cluster) const; + bool canUseOffsetParallelReplicas() const; + + void disableOffsetParallelReplicas(); + + ClusterPtr getClusterForParallelReplicas() const; enum class ParallelReplicasMode : uint8_t { @@ -1333,7 +1350,7 @@ private: void setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_info, bool check_constraints, const std::lock_guard & lock); - void setCurrentRolesWithLock(const std::vector & current_roles_, const std::lock_guard & lock); + void setCurrentRolesWithLock(const std::vector & new_current_roles, const std::lock_guard & lock); void setSettingWithLock(std::string_view name, const String & value, const std::lock_guard & lock); @@ -1366,6 +1383,7 @@ private: void initGlobal(); void setUserID(const UUID & user_id_); + void setCurrentRolesImpl(const std::vector & new_current_roles, bool throw_if_not_granted, bool skip_if_not_granted, const std::shared_ptr & user); template void checkAccessImpl(const Args &... args) const; diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 81bb6290acb..4a84a7bf570 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -2,7 +2,6 @@ #include #include -#include #include @@ -23,7 +22,6 @@ #include #include -#include #include #include #include @@ -93,7 +91,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con return Chunk(std::move(columns), 1); } -class TableCheckTask : public ChunkInfoCloneable +class TableCheckTask : public ChunkInfo { public: TableCheckTask(StorageID table_id, const std::variant & partition_or_part, ContextPtr context) @@ -112,12 +110,6 @@ public: context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID()); } - TableCheckTask(const TableCheckTask & other) - : table(other.table) - , check_data_tasks(other.check_data_tasks) - , is_finished(other.is_finished.load()) - {} - std::optional checkNext() const { if (isFinished()) @@ -129,8 +121,8 @@ public: std::this_thread::sleep_for(sleep_time); }); - IStorage::DataValidationTasksPtr tmp = check_data_tasks; - auto result = table->checkDataNext(tmp); + IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks; + auto result = table->checkDataNext(check_data_tasks_); is_finished = !result.has_value(); return result; } @@ -188,7 +180,7 @@ protected: /// source should return at least one row to start pipeline result.addColumn(ColumnUInt8::create(1, 1)); /// actual data stored in chunk info - result.getChunkInfos().add(std::move(current_check_task)); + result.setChunkInfo(std::move(current_check_task)); return result; } @@ -288,7 +280,7 @@ public: protected: void transform(Chunk & chunk) override { - auto table_check_task = chunk.getChunkInfos().get(); + auto table_check_task = std::dynamic_pointer_cast(chunk.getChunkInfo()); auto check_result = table_check_task->checkNext(); if (!check_result) { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 89f503f5fb1..1c728729ee2 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1796,13 +1796,8 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) else insert->select = create.select->clone(); - return InterpreterInsertQuery( - insert, - getContext(), - getContext()->getSettingsRef().insert_allow_materialized_columns, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false).execute(); + return InterpreterInsertQuery(insert, getContext(), + getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); } return {}; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 26b7e074fdf..7c7b4b3f95a 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -534,13 +534,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() } else if (dynamic_cast(ast.getExplainedQuery().get())) { - InterpreterInsertQuery insert( - ast.getExplainedQuery(), - getContext(), - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext()); auto io = insert.execute(); printPipeline(io.pipeline.getProcessors(), buf); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2cbfc55d008..f396db70d21 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -27,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -40,7 +38,6 @@ #include #include #include -#include "base/defines.h" namespace ProfileEvents @@ -397,358 +394,28 @@ Chain InterpreterInsertQuery::buildPreSinkChain( return out; } -std::pair, std::vector> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) -{ - chassert(presink_streams > 0); - chassert(sink_streams > 0); - - ThreadGroupPtr running_group; - if (current_thread) - running_group = current_thread->getThreadGroup(); - if (!running_group) - running_group = std::make_shared(getContext()); - - std::vector sink_chains; - std::vector presink_chains; - - for (size_t i = 0; i < sink_streams; ++i) - { - auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, - running_group, /* elapsed_counter_ms= */ nullptr); - - sink_chains.emplace_back(std::move(out)); - } - - for (size_t i = 0; i < presink_streams; ++i) - { - auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); - presink_chains.emplace_back(std::move(out)); - } - - return {std::move(presink_chains), std::move(sink_chains)}; -} - - -QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table) -{ - const Settings & settings = getContext()->getSettingsRef(); - - auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); - - bool is_trivial_insert_select = false; - - if (settings.optimize_trivial_insert_select) - { - const auto & select_query = query.select->as(); - const auto & selects = select_query.list_of_selects->children; - const auto & union_modes = select_query.list_of_modes; - - /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries - const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; - - is_trivial_insert_select = - std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) - && std::all_of(selects.begin(), selects.end(), isTrivialSelect); - } - - ContextPtr select_context = getContext(); - - if (is_trivial_insert_select) - { - /** When doing trivial INSERT INTO ... SELECT ... FROM table, - * don't need to process SELECT with more than max_insert_threads - * and it's reasonable to set block size for SELECT to the desired block size for INSERT - * to avoid unnecessary squashing. - */ - - Settings new_settings = select_context->getSettings(); - - new_settings.max_threads = std::max(1, settings.max_insert_threads); - - if (table->prefersLargeBlocks()) - { - if (settings.min_insert_block_size_rows) - new_settings.max_block_size = settings.min_insert_block_size_rows; - if (settings.min_insert_block_size_bytes) - new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; - } - - auto context_for_trivial_select = Context::createCopy(context); - context_for_trivial_select->setSettings(new_settings); - context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); - - select_context = context_for_trivial_select; - } - - QueryPipelineBuilder pipeline; - - { - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - - pipeline.dropTotalsAndExtremes(); - - /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. - if (getContext()->getSettingsRef().insert_null_as_default) - { - const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); - const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); - const auto & output_columns = metadata_snapshot->getColumns(); - - if (input_columns.size() == query_columns.size()) - { - for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) - { - /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with - /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) - && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) - && !isVariant(query_columns[col_idx].type) - && !isDynamic(query_columns[col_idx].type) - && output_columns.has(query_columns[col_idx].name)) - { - query_sample_block.setColumn( - col_idx, - ColumnWithTypeAndName( - makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), - makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), - query_columns[col_idx].name)); - } - } - } - } - - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - query_sample_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header, actions); - }); - - /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it or may have different settings for applying Sparse serialization. - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - auto context_ptr = getContext(); - auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - - return counting; - }); - - size_t num_select_threads = pipeline.getNumThreads(); - - pipeline.resize(1); - - if (shouldAddSquashingFroStorage(table)) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, - table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); - }); - } - - pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - if (!settings.insert_deduplication_token.value.empty()) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(settings.insert_deduplication_token.value, in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - } - - /// Number of streams works like this: - /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever - /// InterpreterSelectQuery ends up with. - /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. - /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. - /// * If the table supports parallel inserts, use max_insert_threads for writing to IStorage. - /// Otherwise ResizeProcessor them down to 1 stream. - - size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); - - size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; - - if (!settings.parallel_view_processing) - { - auto table_id = table->getStorageID(); - auto views = DatabaseCatalog::instance().getDependentViews(table_id); - - if (table->isView() || !views.empty()) - sink_streams_size = 1; - } - - auto [presink_chains, sink_chains] = buildPreAndSinkChains( - presink_streams_size, sink_streams_size, - table, metadata_snapshot, query_sample_block); - - pipeline.resize(presink_chains.size()); - - if (shouldAddSquashingFroStorage(table)) - { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, - table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); - }); - } - - for (auto & chain : presink_chains) - pipeline.addResources(chain.detachResources()); - pipeline.addChains(std::move(presink_chains)); - - pipeline.resize(sink_streams_size); - - for (auto & chain : sink_chains) - pipeline.addResources(chain.detachResources()); - pipeline.addChains(std::move(sink_chains)); - - if (!settings.parallel_view_processing) - { - /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. - if (pipeline.getNumThreads() > num_select_threads) - pipeline.setMaxThreads(num_select_threads); - } - else if (pipeline.getNumThreads() < settings.max_threads) - { - /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, - /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. - /// - /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. - pipeline.setMaxThreads(settings.max_threads); - } - - pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header); - }); - - return QueryPipelineBuilder::getPipeline(std::move(pipeline)); -} - - -QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table) -{ - const Settings & settings = getContext()->getSettingsRef(); - - auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); - - Chain chain; - - { - auto [presink_chains, sink_chains] = buildPreAndSinkChains( - /* presink_streams */1, /* sink_streams */1, - table, metadata_snapshot, query_sample_block); - - chain = std::move(presink_chains.front()); - chain.appendChain(std::move(sink_chains.front())); - } - - if (!settings.insert_deduplication_token.value.empty()) - { - chain.addSource(std::make_shared(chain.getInputHeader())); - chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); - } - - chain.addSource(std::make_shared(chain.getInputHeader())); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(squashing)); - - auto balancing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(balancing)); - } - - auto context_ptr = getContext(); - auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - chain.addSource(std::move(counting)); - - QueryPipeline pipeline = QueryPipeline(std::move(chain)); - - pipeline.setNumThreads(std::min(pipeline.getNumThreads(), settings.max_threads)); - pipeline.setConcurrencyControl(settings.use_concurrency_control); - - if (query.hasInlinedData() && !async_insert) - { - /// can execute without additional data - auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); - for (auto && buffer : owned_buffers) - format->addBuffer(std::move(buffer)); - - auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); - pipeline.complete(std::move(pipe)); - } - - return pipeline; -} - - BlockIO InterpreterInsertQuery::execute() { const Settings & settings = getContext()->getSettingsRef(); auto & query = query_ptr->as(); + QueryPipelineBuilder pipeline; + std::optional distributed_pipeline; + QueryPlanResourceHolder resources; StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); + StoragePtr inner_table; + if (const auto * mv = dynamic_cast(table.get())) + inner_table = mv->getTargetTable(); + if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); - auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); /// For table functions we check access while executing @@ -756,43 +423,320 @@ BlockIO InterpreterInsertQuery::execute() if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); - if (!allow_materialized) + if (query.select && settings.parallel_distributed_insert_select) + // Distributed INSERT SELECT + distributed_pipeline = table->distributedWrite(query, getContext()); + + std::vector presink_chains; + std::vector sink_chains; + if (!distributed_pipeline) { - for (const auto & column : metadata_snapshot->getColumns()) - if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); + /// Number of streams works like this: + /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever + /// InterpreterSelectQuery ends up with. + /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. + /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. + /// * If the table supports parallel inserts, use the same streams for writing to IStorage. + /// Otherwise ResizeProcessor them down to 1 stream. + /// * If it's not an INSERT SELECT, forget all that and use one stream. + size_t pre_streams_size = 1; + size_t sink_streams_size = 1; + + if (query.select) + { + bool is_trivial_insert_select = false; + + if (settings.optimize_trivial_insert_select) + { + const auto & select_query = query.select->as(); + const auto & selects = select_query.list_of_selects->children; + const auto & union_modes = select_query.list_of_modes; + + /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries + const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; + + is_trivial_insert_select = + std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) + && std::all_of(selects.begin(), selects.end(), isTrivialSelect); + } + + if (is_trivial_insert_select) + { + /** When doing trivial INSERT INTO ... SELECT ... FROM table, + * don't need to process SELECT with more than max_insert_threads + * and it's reasonable to set block size for SELECT to the desired block size for INSERT + * to avoid unnecessary squashing. + */ + + Settings new_settings = getContext()->getSettings(); + + new_settings.max_threads = std::max(1, settings.max_insert_threads); + + if (table->prefersLargeBlocks()) + { + if (settings.min_insert_block_size_rows) + new_settings.max_block_size = settings.min_insert_block_size_rows; + if (settings.min_insert_block_size_bytes) + new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; + } + + auto new_context = Context::createCopy(context); + new_context->setSettings(new_settings); + new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); + + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + else + { + /// Passing 1 as subquery_depth will disable limiting size of intermediate result. + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + + pipeline.dropTotalsAndExtremes(); + + if (settings.max_insert_threads > 1) + { + auto table_id = table->getStorageID(); + auto views = DatabaseCatalog::instance().getDependentViews(table_id); + + /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them. + /// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts. + const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert(); + pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads + : std::min(settings.max_insert_threads, pipeline.getNumStreams()); + + /// Deduplication when passing insert_deduplication_token breaks if using more than one thread + if (!settings.insert_deduplication_token.toString().empty()) + { + LOG_DEBUG( + getLogger("InsertQuery"), + "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); + pre_streams_size = 1; + } + + if (table->supportsParallelInsert()) + sink_streams_size = pre_streams_size; + } + + pipeline.resize(pre_streams_size); + + /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. + if (getContext()->getSettingsRef().insert_null_as_default) + { + const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); + const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); + const auto & output_columns = metadata_snapshot->getColumns(); + + if (input_columns.size() == query_columns.size()) + { + for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) + { + /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with + /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) + && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) + && !isVariant(query_columns[col_idx].type) + && !isDynamic(query_columns[col_idx].type) + && output_columns.has(query_columns[col_idx].name)) + query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); + } + } + } + } + + ThreadGroupPtr running_group; + if (current_thread) + running_group = current_thread->getThreadGroup(); + if (!running_group) + running_group = std::make_shared(getContext()); + for (size_t i = 0; i < sink_streams_size; ++i) + { + auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, + running_group, /* elapsed_counter_ms= */ nullptr); + sink_chains.emplace_back(std::move(out)); + } + for (size_t i = 0; i < pre_streams_size; ++i) + { + auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); + presink_chains.emplace_back(std::move(out)); + } } BlockIO res; - if (query.select) + /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? + if (distributed_pipeline) { - if (settings.parallel_distributed_insert_select) + res.pipeline = std::move(*distributed_pipeline); + } + else if (query.select) + { + const auto & header = presink_chains.at(0).getInputHeader(); + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - auto distributed = table->distributedWrite(query, getContext()); - if (distributed) - { - res.pipeline = std::move(*distributed); - } - else - { - res.pipeline = buildInsertSelectPipeline(query, table); - } - } - else + return std::make_shared(in_header, actions); + }); + + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it or may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - res.pipeline = buildInsertSelectPipeline(query, table); + return std::make_shared(in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + auto context_ptr = getContext(); + auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + + return counting; + }); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + size_t threads = presink_chains.size(); + + pipeline.resize(1); + + pipeline.addTransform(std::make_shared( + header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + + pipeline.resize(threads); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + }); } + + size_t num_select_threads = pipeline.getNumThreads(); + + for (auto & chain : presink_chains) + resources = chain.detachResources(); + for (auto & chain : sink_chains) + resources = chain.detachResources(); + + pipeline.addChains(std::move(presink_chains)); + pipeline.resize(sink_chains.size()); + pipeline.addChains(std::move(sink_chains)); + + if (!settings.parallel_view_processing) + { + /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. + if (pipeline.getNumThreads() > num_select_threads) + pipeline.setMaxThreads(num_select_threads); + } + else if (pipeline.getNumThreads() < settings.max_threads) + { + /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, + /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. + /// + /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. + pipeline.setMaxThreads(settings.max_threads); + } + + pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header); + }); + + if (!allow_materialized) + { + for (const auto & column : metadata_snapshot->getColumns()) + if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); + } + + res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); } else { - res.pipeline = buildInsertPipeline(query, table); + auto & chain = presink_chains.at(0); + chain.appendChain(std::move(sink_chains.at(0))); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(squashing)); + + auto balancing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(balancing)); + } + + auto context_ptr = getContext(); + auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + chain.addSource(std::move(counting)); + + res.pipeline = QueryPipeline(std::move(presink_chains[0])); + res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); + res.pipeline.setConcurrencyControl(settings.use_concurrency_control); + + if (query.hasInlinedData() && !async_insert) + { + /// can execute without additional data + auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); + for (auto && buffer : owned_buffers) + format->addBuffer(std::move(buffer)); + + auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); + res.pipeline.complete(std::move(pipe)); + } } - res.pipeline.addStorageHolder(table); + res.pipeline.addResources(std::move(resources)); - if (const auto * mv = dynamic_cast(table.get())) - res.pipeline.addStorageHolder(mv->getTargetTable()); + res.pipeline.addStorageHolder(table); + if (inner_table) + res.pipeline.addStorageHolder(inner_table); return res; } @@ -813,27 +757,17 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont } } - void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const { extendQueryLogElemImpl(elem, context_); } - void registerInterpreterInsertQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) { - return std::make_unique( - args.query, - args.context, - args.allow_materialized, - /* no_squash */false, - /* no_destination */false, - /* async_insert */false); + return std::make_unique(args.query, args.context, args.allow_materialized); }; factory.registerInterpreter("InterpreterInsertQuery", create_fn); } - - } diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 894c7c42144..bf73fb2a319 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -23,10 +23,10 @@ public: InterpreterInsertQuery( const ASTPtr & query_ptr_, ContextPtr context_, - bool allow_materialized_, - bool no_squash_, - bool no_destination, - bool async_insert_); + bool allow_materialized_ = false, + bool no_squash_ = false, + bool no_destination_ = false, + bool async_insert_ = false); /** Prepare a request for execution. Return block streams * - the stream into which you can write data to execute the query, if INSERT; @@ -73,17 +73,12 @@ private: ASTPtr query_ptr; const bool allow_materialized; - bool no_squash = false; - bool no_destination = false; + const bool no_squash; + const bool no_destination; const bool async_insert; std::vector> owned_buffers; - std::pair, std::vector> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); - - QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table); - QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table); - Chain buildSink( const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 90c484636ea..fae204912fc 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -565,7 +565,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context); ASTPtr parallel_replicas_custom_filter_ast = nullptr; - if (storage && context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY && !joined_tables.tablesWithColumns().empty()) + if (storage && context->canUseParallelReplicasCustomKey() && !joined_tables.tablesWithColumns().empty()) { if (settings.parallel_replicas_count > 1) { @@ -586,16 +586,28 @@ InterpreterSelectQuery::InterpreterSelectQuery( else if (settings.parallel_replica_offset > 0) { throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Parallel replicas processing with custom_key has been requested " - "(setting 'max_parallel_replicas') but the table does not have custom_key defined for it " - "or it's invalid (settings `parallel_replicas_custom_key`)"); + ErrorCodes::BAD_ARGUMENTS, + "Parallel replicas processing with custom_key has been requested " + "(setting 'max_parallel_replicas') but the table does not have custom_key defined for it " + "or it's invalid (settings `parallel_replicas_custom_key`)"); } } + /// We disable prefer_localhost_replica because if one of the replicas is local it will create a single local plan + /// instead of executing the query with multiple replicas + /// We can enable this setting again for custom key parallel replicas when we can generate a plan that will use both a + /// local plan and remote replicas else if (auto * distributed = dynamic_cast(storage.get()); - distributed && context->canUseParallelReplicasCustomKey(*distributed->getCluster())) + distributed && context->canUseParallelReplicasCustomKeyForCluster(*distributed->getCluster())) { context->setSetting("distributed_group_by_no_merge", 2); + context->setSetting("prefer_localhost_replica", Field(0)); + } + else if ( + storage->isMergeTree() && (storage->supportsReplication() || settings.parallel_replicas_for_non_replicated_merge_tree) + && context->getClientInfo().distributed_depth == 0 + && context->canUseParallelReplicasCustomKeyForCluster(*context->getClusterForParallelReplicas())) + { + context->setSetting("prefer_localhost_replica", Field(0)); } } diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 25434d1103e..f8b6a6542cc 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,7 +1,6 @@ #include #include #include -#include namespace DB @@ -12,33 +11,24 @@ namespace ErrorCodes } Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) + : header(header_) + , min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) - , header(header_) { } Chunk Squashing::flush() { - if (!accumulated) - return {}; - - auto result = convertToChunk(accumulated.extract()); - chassert(result); - return result; + return convertToChunk(std::move(chunks_to_merge_vec)); } Chunk Squashing::squash(Chunk && input_chunk) { - if (!input_chunk) + if (!input_chunk.hasChunkInfo()) return Chunk(); - auto squash_info = input_chunk.getChunkInfos().extract(); - - if (!squash_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); - - return squash(std::move(squash_info->chunks), std::move(input_chunk.getChunkInfos())); + const auto *info = getInfoFromChunk(input_chunk); + return squash(info->chunks); } Chunk Squashing::add(Chunk && input_chunk) @@ -47,37 +37,48 @@ Chunk Squashing::add(Chunk && input_chunk) return {}; /// Just read block is already enough. - if (isEnoughSize(input_chunk)) + if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes())) { /// If no accumulated data, return just read block. - if (!accumulated) + if (chunks_to_merge_vec.empty()) { - accumulated.add(std::move(input_chunk)); - return convertToChunk(accumulated.extract()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + return res_chunk; } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Chunk res_chunk = convertToChunk(accumulated.extract()); - accumulated.add(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); return res_chunk; } /// Accumulated block is already enough. - if (isEnoughSize()) + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) { /// Return accumulated data and place new block to accumulated data. - Chunk res_chunk = convertToChunk(accumulated.extract()); - accumulated.add(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); return res_chunk; } /// Pushing data into accumulating vector - accumulated.add(std::move(input_chunk)); + expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); /// If accumulated data is big enough, we send it - if (isEnoughSize()) - return convertToChunk(accumulated.extract()); - + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + { + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + changeCurrentSize(0, 0); + chunks_to_merge_vec.clear(); + return res_chunk; + } return {}; } @@ -89,15 +90,14 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const auto info = std::make_shared(); info->chunks = std::move(chunks); - // It is imortant that chunk is not empty, it has to have columns even if they are empty - auto aggr_chunk = Chunk(header.getColumns(), 0); - aggr_chunk.getChunkInfos().add(std::move(info)); - chassert(aggr_chunk); - return aggr_chunk; + chunks.clear(); + + return Chunk(header.cloneEmptyColumns(), 0, info); } -Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos) +Chunk Squashing::squash(std::vector & input_chunks) { + Chunk accumulated_chunk; std::vector mutable_columns = {}; size_t rows = 0; for (const Chunk & chunk : input_chunks) @@ -119,17 +119,35 @@ Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoColl for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) { const auto source_column = columns[j]; + mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); } } + accumulated_chunk.setColumns(std::move(mutable_columns), rows); + return accumulated_chunk; +} - Chunk result; - result.setColumns(std::move(mutable_columns), rows); - result.setChunkInfos(infos); - result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos())); +const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk) +{ + const auto& info = chunk.getChunkInfo(); + const auto * agg_info = typeid_cast(info.get()); - chassert(result); - return result; + if (!agg_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); + + return agg_info; +} + +void Squashing::expandCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows += rows; + accumulated_size.bytes += bytes; +} + +void Squashing::changeCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows = rows; + accumulated_size.bytes = bytes; } bool Squashing::isEnoughSize(size_t rows, size_t bytes) const @@ -138,28 +156,4 @@ bool Squashing::isEnoughSize(size_t rows, size_t bytes) const || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } - -bool Squashing::isEnoughSize() const -{ - return isEnoughSize(accumulated.getRows(), accumulated.getBytes()); -}; - -bool Squashing::isEnoughSize(const Chunk & chunk) const -{ - return isEnoughSize(chunk.getNumRows(), chunk.bytes()); -} - -void Squashing::CurrentSize::add(Chunk && chunk) -{ - rows += chunk.getNumRows(); - bytes += chunk.bytes(); - chunks.push_back(std::move(chunk)); -} - -std::vector Squashing::CurrentSize::extract() -{ - auto result = std::move(chunks); - *this = {}; - return result; -} } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 64a9768a71f..d76cca60e41 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -8,18 +8,9 @@ namespace DB { -class ChunksToSquash : public ChunkInfoCloneable +struct ChunksToSquash : public ChunkInfo { -public: - ChunksToSquash() = default; - ChunksToSquash(const ChunksToSquash & other) - { - chunks.reserve(other.chunks.size()); - for (const auto & chunk: other.chunks) - chunks.push_back(chunk.clone()); - } - - std::vector chunks = {}; + mutable std::vector chunks = {}; }; /** Merging consecutive passed blocks to specified minimum size. @@ -45,35 +36,32 @@ public: static Chunk squash(Chunk && input_chunk); Chunk flush(); - void setHeader(Block header_) { header = std::move(header_); } - const Block & getHeader() const { return header; } - -private: - class CurrentSize + bool isDataLeft() + { + return !chunks_to_merge_vec.empty(); + } + + Block header; +private: + struct CurrentSize { - std::vector chunks = {}; size_t rows = 0; size_t bytes = 0; - - public: - explicit operator bool () const { return !chunks.empty(); } - size_t getRows() const { return rows; } - size_t getBytes() const { return bytes; } - void add(Chunk && chunk); - std::vector extract(); }; - const size_t min_block_size_rows; - const size_t min_block_size_bytes; - Block header; + std::vector chunks_to_merge_vec = {}; + size_t min_block_size_rows; + size_t min_block_size_bytes; - CurrentSize accumulated; + CurrentSize accumulated_size; - static Chunk squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos); + static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); - bool isEnoughSize() const; + static Chunk squash(std::vector & input_chunks); + + void expandCurrentSize(size_t rows, size_t bytes); + void changeCurrentSize(size_t rows, size_t bytes); bool isEnoughSize(size_t rows, size_t bytes) const; - bool isEnoughSize(const Chunk & chunk) const; Chunk convertToChunk(std::vector && chunks) const; }; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index f386e157b14..7d84efba1b5 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -356,10 +357,15 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf if (blob_storage_log) logs.emplace_back(blob_storage_log.get()); + bool should_prepare = global_context->getServerSettings().prepare_system_log_tables_on_startup; try { for (auto & log : logs) + { log->startup(); + if (should_prepare) + log->prepareTable(); + } } catch (...) { @@ -538,13 +544,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, insert_context->makeQueryContext(); addSettingsForQuery(insert_context, IAST::QueryKind::Insert); - InterpreterInsertQuery interpreter( - query_ptr, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(query_ptr, insert_context); BlockIO io = interpreter.execute(); PushingPipelineExecutor executor(io.pipeline); diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 94cb8c3e2fd..0ac468b15ec 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -135,6 +135,12 @@ public: void stopFlushThread() override; + /** Creates new table if it does not exist. + * Renames old table if its structure is not suitable. + * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. + */ + void prepareTable() override; + protected: LoggerPtr log; @@ -145,12 +151,6 @@ protected: StoragePtr getStorage() const; - /** Creates new table if it does not exist. - * Renames old table if its structure is not suitable. - * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. - */ - void prepareTable() override; - /// Some tables can override settings for internal queries virtual void addSettingsForQuery(ContextMutablePtr & mutable_context, IAST::QueryKind query_kind) const; diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 9ca521a4ab3..6ec6a64b13d 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -233,7 +233,8 @@ void ThreadStatus::attachToGroupImpl(const ThreadGroupPtr & thread_group_) { /// Attach or init current thread to thread group and copy useful information from it thread_group = thread_group_; - thread_group->linkThread(thread_id); + if (!internal_thread) + thread_group->linkThread(thread_id); performance_counters.setParent(&thread_group->performance_counters); memory_tracker.setParent(&thread_group->memory_tracker); @@ -269,7 +270,8 @@ void ThreadStatus::detachFromGroup() /// Extract MemoryTracker out from query and user context memory_tracker.setParent(&total_memory_tracker); - thread_group->unlinkThread(); + if (!internal_thread) + thread_group->unlinkThread(); thread_group.reset(); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 6ce6f5e454e..a3c5a7ed3ed 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } - /// Check for dynamic subcolumns in unknown required columns. + /// Check for dynamic subcolums in unknown required columns. if (!unknown_required_source_columns.empty()) { for (const NameAndTypePair & pair : source_columns_ordinary) diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index e372f036073..9c3f85128cf 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -237,6 +237,7 @@ private: Int64 timeout_seconds = 120; bool is_replicated_database = false; bool throw_on_timeout = true; + bool throw_on_timeout_only_active = false; bool only_running_hosts = false; bool timeout_exceeded = false; @@ -316,8 +317,8 @@ DDLQueryStatusSource::DDLQueryStatusSource( , log(getLogger("DDLQueryStatusSource")) { auto output_mode = context->getSettingsRef().distributed_ddl_output_mode; - throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE - || output_mode == DistributedDDLOutputMode::NONE || output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; + throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE; + throw_on_timeout_only_active = output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE || output_mode == DistributedDDLOutputMode::NONE_ONLY_ACTIVE; if (hosts_to_wait) { @@ -451,7 +452,7 @@ Chunk DDLQueryStatusSource::generate() "({} of them are currently executing the task, {} are inactive). " "They are going to execute the query in background. Was waiting for {} seconds{}"; - if (throw_on_timeout) + if (throw_on_timeout || (throw_on_timeout_only_active && !stop_waiting_offline_hosts)) { if (!first_exception) first_exception = std::make_unique(Exception(ErrorCodes::TIMEOUT_EXCEEDED, diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 5997452bcf3..d4fc9a4bc4d 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1129,11 +1129,11 @@ inline static bool makeHexOrBinStringLiteral(IParser::Pos & pos, ASTPtr & node, if (hex) { - hexStringDecode(str_begin, str_end, res_pos); + hexStringDecode(str_begin, str_end, res_pos, word_size); } else { - binStringDecode(str_begin, str_end, res_pos); + binStringDecode(str_begin, str_end, res_pos, word_size); } return makeStringLiteral(pos, node, String(reinterpret_cast(res.data()), (res_pos - res_begin - 1))); diff --git a/src/Parsers/isUnquotedIdentifier.cpp b/src/Parsers/isUnquotedIdentifier.cpp new file mode 100644 index 00000000000..6f2442635ec --- /dev/null +++ b/src/Parsers/isUnquotedIdentifier.cpp @@ -0,0 +1,20 @@ +#include + +#include + +namespace DB +{ + +bool isUnquotedIdentifier(const String & name) +{ + Lexer lexer(name.data(), name.data() + name.size()); + + auto maybe_ident = lexer.nextToken(); + + if (maybe_ident.type != TokenType::BareWord) + return false; + + return lexer.nextToken().isEnd(); +} + +} diff --git a/src/Parsers/isUnquotedIdentifier.h b/src/Parsers/isUnquotedIdentifier.h new file mode 100644 index 00000000000..839e5860ad3 --- /dev/null +++ b/src/Parsers/isUnquotedIdentifier.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ + +bool isUnquotedIdentifier(const String & name); + +} diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index d26092d57cb..47582d2904f 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -834,7 +834,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres if (row_policy_filter_info.actions) table_expression_data.setRowLevelFilterActions(row_policy_filter_info.actions); - if (query_context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY) + if (query_context->canUseParallelReplicasCustomKey()) { if (settings.parallel_replicas_count > 1) { @@ -843,9 +843,14 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres add_filter(parallel_replicas_custom_key_filter_info, "Parallel replicas custom key filter"); } else if (auto * distributed = typeid_cast(storage.get()); - distributed && query_context->canUseParallelReplicasCustomKey(*distributed->getCluster())) + distributed && query_context->canUseParallelReplicasCustomKeyForCluster(*distributed->getCluster())) { planner_context->getMutableQueryContext()->setSetting("distributed_group_by_no_merge", 2); + /// We disable prefer_localhost_replica because if one of the replicas is local it will create a single local plan + /// instead of executing the query with multiple replicas + /// We can enable this setting again for custom key parallel replicas when we can generate a plan that will use both a + /// local plan and remote replicas + planner_context->getMutableQueryContext()->setSetting("prefer_localhost_replica", Field{0}); } } @@ -879,7 +884,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres }; /// query_plan can be empty if there is nothing to read - if (query_plan.isInitialized() && parallel_replicas_enabled_for_storage(storage, settings) && query_context->canUseParallelReplicasOnInitiator()) + if (query_plan.isInitialized() && parallel_replicas_enabled_for_storage(storage, settings)) { // (1) find read step QueryPlan::Node * node = query_plan.getRootNode(); @@ -906,54 +911,78 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } chassert(reading); - - // (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read - if (settings.parallel_replicas_min_number_of_rows_per_replica > 0) + if (query_context->canUseParallelReplicasCustomKey() && query_context->getClientInfo().distributed_depth == 0) { - auto result_ptr = reading->selectRangesToRead(); - - UInt64 rows_to_read = result_ptr->selected_rows; - if (table_expression_query_info.trivial_limit > 0 && table_expression_query_info.trivial_limit < rows_to_read) - rows_to_read = table_expression_query_info.trivial_limit; - - if (max_block_size_limited && (max_block_size_limited < rows_to_read)) - rows_to_read = max_block_size_limited; - - const size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; - LOG_TRACE( - getLogger("Planner"), - "Estimated {} rows to read. It is enough work for {} parallel replicas", - rows_to_read, - number_of_replicas_to_use); - - if (number_of_replicas_to_use <= 1) + if (auto cluster = query_context->getClusterForParallelReplicas(); + query_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) { - planner_context->getMutableQueryContext()->setSetting( - "allow_experimental_parallel_reading_from_replicas", Field(0)); - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); - LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); - } - else if (number_of_replicas_to_use < settings.max_parallel_replicas) - { - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use); - LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use); + planner_context->getMutableQueryContext()->setSetting("prefer_localhost_replica", Field{0}); + auto modified_query_info = select_query_info; + modified_query_info.cluster = std::move(cluster); + from_stage = QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; + QueryPlan query_plan_parallel_replicas; + ClusterProxy::executeQueryWithParallelReplicasCustomKey( + query_plan_parallel_replicas, + storage->getStorageID(), + modified_query_info, + storage->getInMemoryMetadataPtr()->getColumns(), + storage_snapshot, + from_stage, + table_expression_query_info.query_tree, + query_context); + query_plan = std::move(query_plan_parallel_replicas); } } - - // (3) if parallel replicas still enabled - replace reading step - if (planner_context->getQueryContext()->canUseParallelReplicasOnInitiator()) + else if (query_context->canUseParallelReplicasOnInitiator()) { - from_stage = QueryProcessingStage::WithMergeableState; - QueryPlan query_plan_parallel_replicas; - ClusterProxy::executeQueryWithParallelReplicas( - query_plan_parallel_replicas, - storage->getStorageID(), - from_stage, - table_expression_query_info.query_tree, - table_expression_query_info.planner_context, - query_context, - table_expression_query_info.storage_limits); - query_plan = std::move(query_plan_parallel_replicas); + // (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read + if (settings.parallel_replicas_min_number_of_rows_per_replica > 0) + { + auto result_ptr = reading->selectRangesToRead(); + + UInt64 rows_to_read = result_ptr->selected_rows; + if (table_expression_query_info.trivial_limit > 0 && table_expression_query_info.trivial_limit < rows_to_read) + rows_to_read = table_expression_query_info.trivial_limit; + + if (max_block_size_limited && (max_block_size_limited < rows_to_read)) + rows_to_read = max_block_size_limited; + + const size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica; + LOG_TRACE( + getLogger("Planner"), + "Estimated {} rows to read. It is enough work for {} parallel replicas", + rows_to_read, + number_of_replicas_to_use); + + if (number_of_replicas_to_use <= 1) + { + planner_context->getMutableQueryContext()->setSetting( + "allow_experimental_parallel_reading_from_replicas", Field(0)); + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); + LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); + } + else if (number_of_replicas_to_use < settings.max_parallel_replicas) + { + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use); + LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use); + } + } + + // (3) if parallel replicas still enabled - replace reading step + if (planner_context->getQueryContext()->canUseParallelReplicasOnInitiator()) + { + from_stage = QueryProcessingStage::WithMergeableState; + QueryPlan query_plan_parallel_replicas; + ClusterProxy::executeQueryWithParallelReplicas( + query_plan_parallel_replicas, + storage->getStorageID(), + from_stage, + table_expression_query_info.query_tree, + table_expression_query_info.planner_context, + query_context, + table_expression_query_info.storage_limits); + query_plan = std::move(query_plan_parallel_replicas); + } } } diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 4466be5b3a7..5f6cf2f7230 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -19,6 +19,14 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns checkNumRowsIsConsistent(); } +Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(std::move(columns_)) + , num_rows(num_rows_) + , chunk_info(std::move(chunk_info_)) +{ + checkNumRowsIsConsistent(); +} + static Columns unmuteColumns(MutableColumns && mutable_columns) { Columns columns; @@ -35,11 +43,17 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) checkNumRowsIsConsistent(); } +Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) + : columns(unmuteColumns(std::move(columns_))) + , num_rows(num_rows_) + , chunk_info(std::move(chunk_info_)) +{ + checkNumRowsIsConsistent(); +} + Chunk Chunk::clone() const { - auto tmp = Chunk(getColumns(), getNumRows()); - tmp.setChunkInfos(chunk_infos.clone()); - return tmp; + return Chunk(getColumns(), getNumRows(), chunk_info); } void Chunk::setColumns(Columns columns_, UInt64 num_rows_) diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 1348966c0d3..4f753798eaa 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -1,9 +1,7 @@ #pragma once -#include #include - -#include +#include namespace DB { @@ -11,29 +9,11 @@ namespace DB class ChunkInfo { public: - using Ptr = std::shared_ptr; - - ChunkInfo() = default; - ChunkInfo(const ChunkInfo&) = default; - ChunkInfo(ChunkInfo&&) = default; - - virtual Ptr clone() const = 0; virtual ~ChunkInfo() = default; + ChunkInfo() = default; }; - -template -class ChunkInfoCloneable : public ChunkInfo -{ -public: - ChunkInfoCloneable() = default; - ChunkInfoCloneable(const ChunkInfoCloneable & other) = default; - - Ptr clone() const override - { - return std::static_pointer_cast(std::make_shared(*static_cast(this))); - } -}; +using ChunkInfoPtr = std::shared_ptr; /** * Chunk is a list of columns with the same length. @@ -52,26 +32,26 @@ public: class Chunk { public: - using ChunkInfoCollection = CollectionOfDerivedItems; - Chunk() = default; Chunk(const Chunk & other) = delete; Chunk(Chunk && other) noexcept : columns(std::move(other.columns)) , num_rows(other.num_rows) - , chunk_infos(std::move(other.chunk_infos)) + , chunk_info(std::move(other.chunk_info)) { other.num_rows = 0; } Chunk(Columns columns_, UInt64 num_rows_); + Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk(MutableColumns columns_, UInt64 num_rows_); + Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk & operator=(const Chunk & other) = delete; Chunk & operator=(Chunk && other) noexcept { columns = std::move(other.columns); - chunk_infos = std::move(other.chunk_infos); + chunk_info = std::move(other.chunk_info); num_rows = other.num_rows; other.num_rows = 0; return *this; @@ -82,15 +62,15 @@ public: void swap(Chunk & other) noexcept { columns.swap(other.columns); + chunk_info.swap(other.chunk_info); std::swap(num_rows, other.num_rows); - chunk_infos.swap(other.chunk_infos); } void clear() { num_rows = 0; columns.clear(); - chunk_infos.clear(); + chunk_info.reset(); } const Columns & getColumns() const { return columns; } @@ -101,9 +81,9 @@ public: /** Get empty columns with the same types as in block. */ MutableColumns cloneEmptyColumns() const; - ChunkInfoCollection & getChunkInfos() { return chunk_infos; } - const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; } - void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); } + const ChunkInfoPtr & getChunkInfo() const { return chunk_info; } + bool hasChunkInfo() const { return chunk_info != nullptr; } + void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); } UInt64 getNumRows() const { return num_rows; } UInt64 getNumColumns() const { return columns.size(); } @@ -127,7 +107,7 @@ public: private: Columns columns; UInt64 num_rows = 0; - ChunkInfoCollection chunk_infos; + ChunkInfoPtr chunk_info; void checkNumRowsIsConsistent(); }; @@ -137,15 +117,11 @@ using Chunks = std::vector; /// AsyncInsert needs two kinds of information: /// - offsets of different sub-chunks /// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`. -class AsyncInsertInfo : public ChunkInfoCloneable +class AsyncInsertInfo : public ChunkInfo { public: AsyncInsertInfo() = default; - AsyncInsertInfo(const AsyncInsertInfo & other) = default; - AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) - : offsets(offsets_) - , tokens(tokens_) - {} + explicit AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) : offsets(offsets_), tokens(tokens_) {} std::vector offsets; std::vector tokens; @@ -154,11 +130,9 @@ public: using AsyncInsertInfoPtr = std::shared_ptr; /// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults. -class ChunkMissingValues : public ChunkInfoCloneable +class ChunkMissingValues : public ChunkInfo { public: - ChunkMissingValues(const ChunkMissingValues & other) = default; - using RowsBitMask = std::vector; /// a bit per row for a column const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index d9fab88fe1f..d27002197d2 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -147,10 +147,13 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto agg_info = chunk.getChunkInfos().get()) + if (auto chunk_info = chunk.getChunkInfo()) { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; + if (const auto * agg_info = typeid_cast(chunk_info.get())) + { + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; + } } return true; diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index 25c15d40c9a..cbf73c5cb07 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -73,10 +73,13 @@ bool PullingPipelineExecutor::pull(Block & block) } block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto agg_info = chunk.getChunkInfos().get()) + if (auto chunk_info = chunk.getChunkInfo()) { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; + if (const auto * agg_info = typeid_cast(chunk_info.get())) + { + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; + } } return true; diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 9d056b42101..fe82d1b1c53 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -227,7 +227,12 @@ private: return true; } - String column_name = "_dummy_" + std::to_string(replaced_literals.size()); + /// When generating placeholder names, ensure that we use names + /// requiring quotes to be valid identifiers. This prevents the + /// tuple() function from generating named tuples. Otherwise, + /// inserting named tuples with different names into another named + /// tuple will result in only default values being inserted. + String column_name = "-dummy-" + std::to_string(replaced_literals.size()); replaced_literals.emplace_back(literal, column_name, force_nullable); setDataType(replaced_literals.back()); ast = std::make_shared(column_name); diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index dcd5a531b05..c10969b02b7 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -900,6 +900,11 @@ bool NativeORCBlockInputFormat::prepareStripeReader() orc::RowReaderOptions row_reader_options; row_reader_options.includeTypes(include_indices); + if (format_settings.orc.read_use_writer_time_zone) + { + String writer_time_zone = current_stripe_info->getWriterTimezone(); + row_reader_options.setTimezoneName(writer_time_zone); + } row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength()); if (format_settings.orc.filter_push_down && sarg) { diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 9e499e2c400..a5d334f4f1d 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -179,9 +179,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); Chunks piece; - piece.emplace_back(std::move(columns), count); - piece.back().setChunkInfos(concatenated.getChunkInfos()); - + piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); writeRowGroup(std::move(piece)); } } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 46be6e74693..4136fc5a5f2 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -8,9 +8,8 @@ namespace ErrorCodes } IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header) - : IProcessor({std::move(input_header)}, {std::move(output_header)}) - , input(inputs.front()) - , output(outputs.front()) + : IProcessor({std::move(input_header)}, {std::move(output_header)}), + input(inputs.front()), output(outputs.front()) { } diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index 86675bcb237..466adf93538 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -53,11 +53,13 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num if (!input.chunk.hasRows()) return; - if (input.chunk.getChunkInfos().empty()) + const auto & info = input.chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm"); Int64 allocated_bytes = 0; - if (auto arenas_info = input.chunk.getChunkInfos().get()) + /// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator. + if (const auto * arenas_info = typeid_cast(info.get())) allocated_bytes = arenas_info->allocated_bytes; states[source_num] = State{input.chunk, description, allocated_bytes}; @@ -134,7 +136,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge() info->chunk_num = chunk_num++; Chunk chunk; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } @@ -161,7 +163,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation() chunks.emplace_back(std::move(new_columns), current_rows); } - chunks.back().getChunkInfos().add(std::make_shared()); + chunks.back().setChunkInfo(std::make_shared()); states[i].current_row = states[i].to_row; /// We assume that sizes in bytes of rows are almost the same. diff --git a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h index e4f22deec8d..bcf4e759024 100644 --- a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h +++ b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h @@ -6,22 +6,18 @@ namespace DB { /// To carry part level if chunk is produced by a merge tree source -class MergeTreePartLevelInfo : public ChunkInfoCloneable +class MergeTreePartLevelInfo : public ChunkInfo { public: MergeTreePartLevelInfo() = delete; - explicit MergeTreePartLevelInfo(ssize_t part_level) - : origin_merge_tree_part_level(part_level) - { } - MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default; - + explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { } size_t origin_merge_tree_part_level = 0; }; inline size_t getPartLevelFromChunk(const Chunk & chunk) { - const auto part_level_info = chunk.getChunkInfos().get(); - if (part_level_info) + const auto & info = chunk.getChunkInfo(); + if (const auto * part_level_info = typeid_cast(info.get())) return part_level_info->origin_merge_tree_part_level; return 0; } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index cd347d371d9..7b2c7d82a01 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false) { - chunk->getChunkInfos().add(std::make_shared(std::move(chunk->replace_final_selection))); + chunk->setChunkInfo(std::make_shared(std::move(chunk->replace_final_selection))); return IMergingAlgorithm::Status(std::move(*chunk), finished); } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 2f23f2a5c4d..a3ccccf0845 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace Poco { @@ -15,13 +14,11 @@ namespace DB /** Use in skipping final to keep list of indices of selected row after merging final */ -struct ChunkSelectFinalIndices : public ChunkInfoCloneable +struct ChunkSelectFinalIndices : public ChunkInfo { - explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); - ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default; - const ColumnPtr column_holder; const ColumnUInt64 * select_final_indices = nullptr; + explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); }; /** Merges several sorted inputs into one. diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index b1b0182a113..fbb47969b2f 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare() bool is_port_full = !output.canPush(); /// Push if has data. - if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full) + if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full) output.push(std::move(state.output_chunk)); if (!is_initialized) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index be629271736..c218f622870 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -129,7 +129,7 @@ public: IMergingAlgorithm::Status status = algorithm.merge(); - if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty()) + if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo()) { // std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl; state.output_chunk = std::move(status.chunk); diff --git a/src/Processors/QueryPlan/BufferChunksTransform.cpp b/src/Processors/QueryPlan/BufferChunksTransform.cpp new file mode 100644 index 00000000000..3601a68d36e --- /dev/null +++ b/src/Processors/QueryPlan/BufferChunksTransform.cpp @@ -0,0 +1,85 @@ +#include + +namespace DB +{ + +BufferChunksTransform::BufferChunksTransform( + const Block & header_, + size_t max_rows_to_buffer_, + size_t max_bytes_to_buffer_, + size_t limit_) + : IProcessor({header_}, {header_}) + , input(inputs.front()) + , output(outputs.front()) + , max_rows_to_buffer(max_rows_to_buffer_) + , max_bytes_to_buffer(max_bytes_to_buffer_) + , limit(limit_) +{ +} + +IProcessor::Status BufferChunksTransform::prepare() +{ + if (output.isFinished()) + { + chunks = {}; + input.close(); + return Status::Finished; + } + + if (input.isFinished() && chunks.empty()) + { + output.finish(); + return Status::Finished; + } + + if (output.canPush()) + { + input.setNeeded(); + + if (!chunks.empty()) + { + auto chunk = std::move(chunks.front()); + chunks.pop(); + + num_buffered_rows -= chunk.getNumRows(); + num_buffered_bytes -= chunk.bytes(); + + output.push(std::move(chunk)); + } + else if (input.hasData()) + { + auto chunk = pullChunk(); + output.push(std::move(chunk)); + } + } + + if (input.hasData() && (num_buffered_rows < max_rows_to_buffer || num_buffered_bytes < max_bytes_to_buffer)) + { + auto chunk = pullChunk(); + num_buffered_rows += chunk.getNumRows(); + num_buffered_bytes += chunk.bytes(); + chunks.push(std::move(chunk)); + } + + if (num_buffered_rows >= max_rows_to_buffer && num_buffered_bytes >= max_bytes_to_buffer) + { + input.setNotNeeded(); + return Status::PortFull; + } + + input.setNeeded(); + return Status::NeedData; +} + +Chunk BufferChunksTransform::pullChunk() +{ + auto chunk = input.pull(); + num_processed_rows += chunk.getNumRows(); + + if (limit && num_processed_rows >= limit) + input.close(); + + return chunk; +} + +} diff --git a/src/Processors/QueryPlan/BufferChunksTransform.h b/src/Processors/QueryPlan/BufferChunksTransform.h new file mode 100644 index 00000000000..752f9910734 --- /dev/null +++ b/src/Processors/QueryPlan/BufferChunksTransform.h @@ -0,0 +1,42 @@ +#pragma once +#include +#include + +namespace DB +{ + +/// Transform that buffers chunks from the input +/// up to the certain limit and pushes chunks to +/// the output whenever it is ready. It can be used +/// to increase parallelism of execution, for example +/// when it is adeded before MergingSortedTransform. +class BufferChunksTransform : public IProcessor +{ +public: + /// OR condition is used for the limits on rows and bytes. + BufferChunksTransform( + const Block & header_, + size_t max_rows_to_buffer_, + size_t max_bytes_to_buffer_, + size_t limit_); + + Status prepare() override; + String getName() const override { return "BufferChunks"; } + +private: + Chunk pullChunk(); + + InputPort & input; + OutputPort & output; + + size_t max_rows_to_buffer; + size_t max_bytes_to_buffer; + size_t limit; + + std::queue chunks; + size_t num_buffered_rows = 0; + size_t num_buffered_bytes = 0; + size_t num_processed_rows = 0; +}; + +} diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index b1ab5561958..c48bdf1552a 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -46,6 +46,10 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes); /// Replace chain `FilterStep -> ExpressionStep` to single FilterStep size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &); +/// Replace chain `FilterStep -> FilterStep` to single FilterStep +/// Note: this breaks short-circuit logic, so it is disabled for now. +size_t tryMergeFilters(QueryPlan::Node * parent_node, QueryPlan::Nodes &); + /// Move FilterStep down if possible. /// May split FilterStep and push down only part of it. size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes); @@ -81,11 +85,12 @@ size_t tryAggregatePartitionsIndependently(QueryPlan::Node * node, QueryPlan::No inline const auto & getOptimizations() { - static const std::array optimizations = {{ + static const std::array optimizations = {{ {tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::lift_up_array_join}, {tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::push_down_limit}, {trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::split_filter}, {tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::merge_expressions}, + {tryMergeFilters, "mergeFilters", &QueryPlanOptimizationSettings::merge_filters}, {tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down}, {tryConvertOuterJoinToInnerJoin, "convertOuterJoinToInnerJoin", &QueryPlanOptimizationSettings::convert_outer_join_to_inner_join}, {tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::execute_functions_after_sorting}, diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp index 2738de1ff5f..4d984133efd 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp @@ -20,6 +20,8 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const settings.merge_expressions = from.query_plan_enable_optimizations && from.query_plan_merge_expressions; + settings.merge_filters = from.query_plan_enable_optimizations && from.query_plan_merge_filters; + settings.filter_push_down = from.query_plan_enable_optimizations && from.query_plan_filter_push_down; settings.convert_outer_join_to_inner_join = from.query_plan_enable_optimizations && from.query_plan_convert_outer_join_to_inner_join; diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h index 85042cea4ed..539ff2eafbb 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -31,6 +31,9 @@ struct QueryPlanOptimizationSettings /// If merge-expressions optimization is enabled. bool merge_expressions = true; + /// If merge-filters optimization is enabled. + bool merge_filters = false; + /// If filter push down optimization is enabled. bool filter_push_down = true; diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp index 6ace1b3b5ce..118abdd701f 100644 --- a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp +++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp @@ -34,7 +34,6 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) auto * parent_expr = typeid_cast(parent.get()); auto * parent_filter = typeid_cast(parent.get()); auto * child_expr = typeid_cast(child.get()); - auto * child_filter = typeid_cast(child.get()); if (parent_expr && child_expr) { @@ -76,7 +75,23 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &) parent_node->children.swap(child_node->children); return 1; } - else if (parent_filter && child_filter) + + return 0; +} +size_t tryMergeFilters(QueryPlan::Node * parent_node, QueryPlan::Nodes &) +{ + if (parent_node->children.size() != 1) + return false; + + QueryPlan::Node * child_node = parent_node->children.front(); + + auto & parent = parent_node->step; + auto & child = child_node->step; + + auto * parent_filter = typeid_cast(parent.get()); + auto * child_filter = typeid_cast(child.get()); + + if (parent_filter && child_filter) { const auto & child_actions = child_filter->getExpression(); const auto & parent_actions = parent_filter->getExpression(); diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index 537555afa2a..e1ef38022b5 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -919,15 +919,23 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) { auto & union_node = node.children.front(); - std::vector infos; + bool use_buffering = false; const SortDescription * max_sort_descr = nullptr; + + std::vector infos; infos.reserve(node.children.size()); + for (auto * child : union_node->children) { infos.push_back(buildInputOrderInfo(*sorting, *child, steps_to_update)); - if (infos.back() && (!max_sort_descr || max_sort_descr->size() < infos.back()->sort_description_for_merging.size())) - max_sort_descr = &infos.back()->sort_description_for_merging; + if (infos.back()) + { + if (!max_sort_descr || max_sort_descr->size() < infos.back()->sort_description_for_merging.size()) + max_sort_descr = &infos.back()->sort_description_for_merging; + + use_buffering |= infos.back()->limit == 0; + } } if (!max_sort_descr || max_sort_descr->empty()) @@ -972,12 +980,13 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) } } - sorting->convertToFinishSorting(*max_sort_descr); + sorting->convertToFinishSorting(*max_sort_descr, use_buffering); } else if (auto order_info = buildInputOrderInfo(*sorting, *node.children.front(), steps_to_update)) { - sorting->convertToFinishSorting(order_info->sort_description_for_merging); - /// update data stream's sorting properties + /// Use buffering only if have filter or don't have limit. + bool use_buffering = order_info->limit == 0; + sorting->convertToFinishSorting(order_info->sort_description_for_merging, use_buffering); updateStepsDataStreams(steps_to_update); } } @@ -1091,7 +1100,7 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, bool can_read = read_from_merge_tree->requestReadingInOrder(order_info->used_prefix_of_sorting_key_size, order_info->direction, order_info->limit); if (!can_read) return 0; - sorting->convertToFinishSorting(order_info->sort_description_for_merging); + sorting->convertToFinishSorting(order_info->sort_description_for_merging, false); } return 0; diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 8f40e523b42..1c40f84d23d 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -8,6 +7,7 @@ #include #include #include +#include #include #include @@ -38,6 +38,7 @@ SortingStep::Settings::Settings(const Context & context) tmp_data = context.getTempDataOnDisk(); min_free_disk_space = settings.min_free_disk_space_for_temporary_data; max_block_bytes = settings.prefer_external_sort_block_bytes; + read_in_order_use_buffering = settings.read_in_order_use_buffering; } SortingStep::Settings::Settings(size_t max_block_size_) @@ -153,10 +154,11 @@ void SortingStep::updateLimit(size_t limit_) } } -void SortingStep::convertToFinishSorting(SortDescription prefix_description_) +void SortingStep::convertToFinishSorting(SortDescription prefix_description_, bool use_buffering_) { type = Type::FinishSorting; prefix_description = std::move(prefix_description_); + use_buffering = use_buffering_; } void SortingStep::scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline) @@ -244,6 +246,14 @@ void SortingStep::mergingSorted(QueryPipelineBuilder & pipeline, const SortDescr /// If there are several streams, then we merge them into one if (pipeline.getNumStreams() > 1) { + if (use_buffering && sort_settings.read_in_order_use_buffering) + { + pipeline.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, sort_settings.max_block_size, sort_settings.max_block_bytes, limit_); + }); + } + auto transform = std::make_shared( pipeline.getHeader(), pipeline.getNumStreams(), @@ -373,9 +383,8 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build mergingSorted(pipeline, prefix_description, (need_finish_sorting ? 0 : limit)); if (need_finish_sorting) - { finishSorting(pipeline, prefix_description, result_description, limit); - } + return; } diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index 49dcf9f3121..b4a49394a13 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -28,6 +28,7 @@ public: TemporaryDataOnDiskScopePtr tmp_data = nullptr; size_t min_free_disk_space = 0; size_t max_block_bytes = 0; + size_t read_in_order_use_buffering = 0; explicit Settings(const Context & context); explicit Settings(size_t max_block_size_); @@ -80,7 +81,7 @@ public: const SortDescription & getSortDescription() const { return result_description; } - void convertToFinishSorting(SortDescription prefix_description); + void convertToFinishSorting(SortDescription prefix_description, bool use_buffering_); Type getType() const { return type; } const Settings & getSettings() const { return sort_settings; } @@ -126,6 +127,7 @@ private: UInt64 limit; bool always_read_till_end = false; + bool use_buffering = false; Settings sort_settings; diff --git a/src/Processors/Sinks/RemoteSink.h b/src/Processors/Sinks/RemoteSink.h index c05cc1defcb..30cf958c072 100644 --- a/src/Processors/Sinks/RemoteSink.h +++ b/src/Processors/Sinks/RemoteSink.h @@ -20,7 +20,7 @@ public: } String getName() const override { return "RemoteSink"; } - void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); } + void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override { RemoteInserter::onFinish(); } }; diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 36bb70f493f..5f9f9f9b1a1 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -15,8 +15,9 @@ void SinkToStorage::onConsume(Chunk chunk) */ Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); - consume(chunk); - cur_chunk = std::move(chunk); + consume(chunk.clone()); + if (!lastBlockIsDuplicate()) + cur_chunk = std::move(chunk); } SinkToStorage::GenerateResult SinkToStorage::onGenerate() diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index c728fa87b1e..023bbd8b094 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -18,7 +18,8 @@ public: void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } protected: - virtual void consume(Chunk & chunk) = 0; + virtual void consume(Chunk chunk) = 0; + virtual bool lastBlockIsDuplicate() const { return false; } private: std::vector table_locks; @@ -37,7 +38,7 @@ class NullSinkToStorage : public SinkToStorage public: using SinkToStorage::SinkToStorage; std::string getName() const override { return "NullSinkToStorage"; } - void consume(Chunk &) override {} + void consume(Chunk) override {} }; using SinkPtr = std::shared_ptr; diff --git a/src/Processors/Sources/BlocksSource.h b/src/Processors/Sources/BlocksSource.h index 7ac460c14e2..ec0dc9609f1 100644 --- a/src/Processors/Sources/BlocksSource.h +++ b/src/Processors/Sources/BlocksSource.h @@ -43,10 +43,7 @@ protected: info->bucket_num = res.info.bucket_num; info->is_overflows = res.info.is_overflows; - auto chunk = Chunk(res.getColumns(), res.rows()); - chunk.getChunkInfos().add(std::move(info)); - - return chunk; + return Chunk(res.getColumns(), res.rows(), std::move(info)); } private: diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index 37a84d9fe96..a3d6fd691d8 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -193,7 +193,15 @@ PostgreSQLSource::~PostgreSQLSource() { if (stream) { + /** Internally libpqxx::stream_from runs PostgreSQL copy query `COPY query TO STDOUT`. + * During transaction abort we try to execute PostgreSQL `ROLLBACK` command and if + * copy query is not cancelled, we wait until it finishes. + */ tx->conn().cancel_query(); + + /** If stream is not closed, libpqxx::stream_from closes stream in destructor, but that way + * exception is added into transaction pending error and we can potentially ignore exception message. + */ stream->close(); } diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 1578bd389c9..3d7dd3f76b8 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -176,7 +176,7 @@ std::optional RemoteSource::tryGenerate() auto info = std::make_shared(); info->bucket_num = block.info.bucket_num; info->is_overflows = block.info.is_overflows; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); } return chunk; diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index 9abe0504d10..00f40a34361 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -5,9 +5,7 @@ namespace DB { -SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) -{ -} +SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {} SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows()) { @@ -22,7 +20,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp auto info = std::make_shared(); info->bucket_num = data.info.bucket_num; info->is_overflows = data.info.is_overflows; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 45b0960ec8f..9ffe15d0f85 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate() variants.aggregates_pool = variants.aggregates_pools.at(0).get(); /// Pass info about used memory by aggregate functions further. - to_push_chunk.getChunkInfos().add(std::make_shared(cur_block_bytes)); + to_push_chunk.setChunkInfo(std::make_shared(cur_block_bytes)); cur_block_bytes = 0; cur_block_size = 0; @@ -351,12 +351,11 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati void FinalizeAggregatedTransform::transform(Chunk & chunk) { if (params->final) - { finalizeChunk(chunk, aggregates_mask); - } - else if (!chunk.getChunkInfos().get()) + else if (!chunk.getChunkInfo()) { - chunk.getChunkInfos().add(std::make_shared()); + auto info = std::make_shared(); + chunk.setChunkInfo(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 41a0d7fc7f1..5d50e97f552 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB { @@ -13,12 +12,10 @@ namespace DB struct InputOrderInfo; using InputOrderInfoPtr = std::shared_ptr; -struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable +struct ChunkInfoWithAllocatedBytes : public ChunkInfo { - ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default; explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) : allocated_bytes(allocated_bytes_) {} - Int64 allocated_bytes; }; diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 517f035667f..cdbe194cfac 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block) UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } @@ -44,11 +44,15 @@ namespace { const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk) { - auto agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo."); - return agg_info.get(); + return agg_info; } /// Reads chunks from file in native format. Provide chunks with aggregation info. @@ -206,7 +210,11 @@ private: void process(Chunk && chunk) { - auto chunks_to_merge = chunk.getChunkInfos().get(); + if (!chunk.hasChunkInfo()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName()); + + const auto & info = chunk.getChunkInfo(); + const auto * chunks_to_merge = typeid_cast(info.get()); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName()); @@ -775,7 +783,7 @@ void AggregatingTransform::initGenerate() { /// Just a reasonable constant, matches default value for the setting `preferred_block_size_bytes` static constexpr size_t oneMB = 1024 * 1024; - return std::make_shared(header, params->params.max_block_size, oneMB); + return std::make_shared(header, params->params.max_block_size, oneMB); }); } /// AggregatingTransform::expandPipeline expects single output port. diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 95983c39d1e..e167acde067 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -20,7 +19,7 @@ namespace CurrentMetrics namespace DB { -class AggregatedChunkInfo : public ChunkInfoCloneable +class AggregatedChunkInfo : public ChunkInfo { public: bool is_overflows = false; diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 49a6581e685..965a084bb13 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -27,12 +27,18 @@ public: } ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } } protected: void onConsume(Chunk chunk) override { - cur_chunk = Squashing::squash(std::move(chunk)); + if (auto res_chunk = DB::Squashing::squash(std::move(chunk))) + cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); } GenerateResult onGenerate() override @@ -42,10 +48,16 @@ protected: res.is_done = true; return res; } + void onFinish() override + { + auto chunk = DB::Squashing::squash({}); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); + } private: Squashing squashing; Chunk cur_chunk; + Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 2c6b3bd8638..3dfb9fe178f 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,7 +1,6 @@ -#include -#include #include +#include #include #include diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp deleted file mode 100644 index 6786f76cbef..00000000000 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ /dev/null @@ -1,236 +0,0 @@ -#include - -#include - -#include -#include -#include - - -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -void RestoreChunkInfosTransform::transform(Chunk & chunk) -{ - chunk.getChunkInfos().append(chunk_infos.clone()); -} - -namespace DeduplicationToken -{ - -String TokenInfo::getToken() const -{ - if (!isDefined()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is not defined, stage {}, token {}", stage, debugToken()); - - return getTokenImpl(); -} - -String TokenInfo::getTokenImpl() const -{ - String result; - result.reserve(getTotalSize()); - - for (const auto & part : parts) - { - if (!result.empty()) - result.append(":"); - result.append(part); - } - - return result; -} - -String TokenInfo::debugToken() const -{ - return getTokenImpl(); -} - -void TokenInfo::addChunkHash(String part) -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_WITH_HASHES; - - if (stage != DEFINE_SOURCE_WITH_HASHES) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(std::move(part)); -} - -void TokenInfo::finishChunkHashes() -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_WITH_HASHES; - - if (stage != DEFINE_SOURCE_WITH_HASHES) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - stage = DEFINED; -} - -void TokenInfo::setUserToken(const String & token) -{ - if (stage == UNDEFINED && empty()) - stage = DEFINE_SOURCE_USER_TOKEN; - - if (stage != DEFINE_SOURCE_USER_TOKEN) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("user-token-{}", token)); -} - -void TokenInfo::setSourceWithUserToken(size_t block_number) -{ - if (stage != DEFINE_SOURCE_USER_TOKEN) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("source-number-{}", block_number)); - - stage = DEFINED; -} - -void TokenInfo::setViewID(const String & id) -{ - if (stage == DEFINED) - stage = DEFINE_VIEW; - - if (stage != DEFINE_VIEW) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("view-id-{}", id)); -} - -void TokenInfo::setViewBlockNumber(size_t block_number) -{ - if (stage != DEFINE_VIEW) - throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); - - addTokenPart(fmt::format("view-block-{}", block_number)); - - stage = DEFINED; -} - -void TokenInfo::reset() -{ - stage = UNDEFINED; - parts.clear(); -} - -void TokenInfo::addTokenPart(String part) -{ - parts.push_back(std::move(part)); -} - -size_t TokenInfo::getTotalSize() const -{ - if (parts.empty()) - return 0; - - size_t size = 0; - for (const auto & part : parts) - size += part.size(); - - // we reserve more size here to be able to add delimenter between parts. - return size + parts.size() - 1; -} - -#ifdef ABORT_ON_LOGICAL_ERROR -void CheckTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); - - LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken()); -} -#endif - -String DefineSourceWithChunkHashTransform::getChunkHash(const Chunk & chunk) -{ - SipHash hash; - for (const auto & colunm : chunk.getColumns()) - colunm->updateHashFast(hash); - - const auto hash_value = hash.get128(); - return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); -} - - -void DefineSourceWithChunkHashTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in DefineSourceWithChunkHashesTransform"); - - if (token_info->isDefined()) - return; - - token_info->addChunkHash(getChunkHash(chunk)); - token_info->finishChunkHashes(); -} - -void SetUserTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetUserTokenTransform"); - token_info->setUserToken(user_token); -} - -void SetSourceBlockNumberTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); - token_info->setSourceWithUserToken(block_number++); -} - -void SetViewIDTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetViewIDTransform"); - token_info->setViewID(view_id); -} - -void SetViewBlockNumberTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); - token_info->setViewBlockNumber(block_number++); -} - -void ResetTokenTransform::transform(Chunk & chunk) -{ - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in ResetTokenTransform"); - - token_info->reset(); -} - -} -} diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h deleted file mode 100644 index d6aff9e1370..00000000000 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.h +++ /dev/null @@ -1,237 +0,0 @@ -#pragma once - -#include -#include - -#include -#include "Common/Logger.h" - - -namespace DB -{ - class RestoreChunkInfosTransform : public ISimpleTransform - { - public: - RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , chunk_infos(std::move(chunk_infos_)) - {} - - String getName() const override { return "RestoreChunkInfosTransform"; } - - void transform(Chunk & chunk) override; - - private: - Chunk::ChunkInfoCollection chunk_infos; - }; - - -namespace DeduplicationToken -{ - class TokenInfo : public ChunkInfoCloneable - { - public: - TokenInfo() = default; - TokenInfo(const TokenInfo & other) = default; - - String getToken() const; - String debugToken() const; - - bool empty() const { return parts.empty(); } - - bool isDefined() const { return stage == DEFINED; } - - void addChunkHash(String part); - void finishChunkHashes(); - - void setUserToken(const String & token); - void setSourceWithUserToken(size_t block_number); - - void setViewID(const String & id); - void setViewBlockNumber(size_t block_number); - - void reset(); - - private: - String getTokenImpl() const; - - void addTokenPart(String part); - size_t getTotalSize() const; - - /* Token has to be prepared in a particular order. - * BuildingStage ensures that token is expanded according the following order. - * Firstly token is expanded with information about the source. - * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number. - * - * transition // method - * UNDEFINED -> DEFINE_SOURCE_WITH_HASHES // addChunkHash - * DEFINE_SOURCE_WITH_HASHES -> DEFINE_SOURCE_WITH_HASHES // addChunkHash - * DEFINE_SOURCE_WITH_HASHES -> DEFINED // defineSourceWithChankHashes - * - * transition // method - * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken - * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken - * - * After token is defined, it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. - * - * transition // method - * DEFINED -> DEFINE_VIEW // setViewID - * DEFINE_VIEW -> DEFINED // defineViewID - */ - - enum BuildingStage - { - UNDEFINED, - DEFINE_SOURCE_WITH_HASHES, - DEFINE_SOURCE_USER_TOKEN, - DEFINE_VIEW, - DEFINED, - }; - - BuildingStage stage = UNDEFINED; - std::vector parts; - }; - - -#ifdef ABORT_ON_LOGICAL_ERROR - /// use that class only with debug builds in CI for introspection - class CheckTokenTransform : public ISimpleTransform - { - public: - CheckTokenTransform(String debug_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , debug(std::move(debug_)) - { - } - - String getName() const override { return "DeduplicationToken::CheckTokenTransform"; } - - void transform(Chunk & chunk) override; - - private: - String debug; - LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform"); - }; -#endif - - - class AddTokenInfoTransform : public ISimpleTransform - { - public: - explicit AddTokenInfoTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::AddTokenInfoTransform"; } - - void transform(Chunk & chunk) override - { - chunk.getChunkInfos().add(std::make_shared()); - } - }; - - - class DefineSourceWithChunkHashTransform : public ISimpleTransform - { - public: - explicit DefineSourceWithChunkHashTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; } - - // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with hashes from the parts. - // But if there is some table with different engine, we still need to define the source of the data in deduplication token - // We use that transform to define the source as a hash of entire block in deduplication token - void transform(Chunk & chunk) override; - - static String getChunkHash(const Chunk & chunk); - }; - - class ResetTokenTransform : public ISimpleTransform - { - public: - explicit ResetTokenTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::ResetTokenTransform"; } - - void transform(Chunk & chunk) override; - }; - - - class SetUserTokenTransform : public ISimpleTransform - { - public: - SetUserTokenTransform(String user_token_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , user_token(std::move(user_token_)) - { - } - - String getName() const override { return "DeduplicationToken::SetUserTokenTransform"; } - - void transform(Chunk & chunk) override; - - private: - String user_token; - }; - - - class SetSourceBlockNumberTransform : public ISimpleTransform - { - public: - explicit SetSourceBlockNumberTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::SetSourceBlockNumberTransform"; } - - void transform(Chunk & chunk) override; - - private: - size_t block_number = 0; - }; - - - class SetViewIDTransform : public ISimpleTransform - { - public: - SetViewIDTransform(String view_id_, const Block & header_) - : ISimpleTransform(header_, header_, true) - , view_id(std::move(view_id_)) - { - } - - String getName() const override { return "DeduplicationToken::SetViewIDTransform"; } - - void transform(Chunk & chunk) override; - - private: - String view_id; - }; - - - class SetViewBlockNumberTransform : public ISimpleTransform - { - public: - explicit SetViewBlockNumberTransform(const Block & header_) - : ISimpleTransform(header_, header_, true) - { - } - - String getName() const override { return "DeduplicationToken::SetViewBlockNumberTransform"; } - - void transform(Chunk & chunk) override; - - private: - size_t block_number = 0; - }; - -} -} diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 04fabc9a3c6..2fbd2c21b8d 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,7 +1,5 @@ #include #include - - namespace DB { diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index ca204bcb482..3e2a9462e54 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -365,9 +365,10 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() return Status::Finished; } - task = data.chunk.getChunkInfos().get(); - if (!task) + if (!data.chunk.hasChunkInfo()) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info"); + + task = std::dynamic_pointer_cast(data.chunk.getChunkInfo()); } else { @@ -478,7 +479,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (output.isFinished()) continue; Chunk chunk; - chunk.getChunkInfos().add(std::make_shared()); + chunk.setChunkInfo(std::make_shared()); output.push(std::move(chunk)); output.finish(); } @@ -495,7 +496,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() { Chunk chunk; auto task = std::make_shared(delayed_blocks, left_delayed_stream_finished_counter); - chunk.getChunkInfos().add(std::move(task)); + chunk.setChunkInfo(task); output.push(std::move(chunk)); } delayed_blocks = nullptr; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index 5f6d9d6fff2..a308af03662 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -1,7 +1,6 @@ #pragma once #include -#include -#include + namespace DB { @@ -112,12 +111,11 @@ private: }; -class DelayedBlocksTask : public ChunkInfoCloneable +class DelayedBlocksTask : public ChunkInfo { public: DelayedBlocksTask() = default; - DelayedBlocksTask(const DelayedBlocksTask & other) = default; explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) : delayed_blocks(std::move(delayed_blocks_)) , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 9ae80e21a68..1eaa5458d37 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,7 +1,6 @@ #include #include - namespace DB { diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h index d7bc320173b..607087fb39c 100644 --- a/src/Processors/Transforms/MemoryBoundMerging.h +++ b/src/Processors/Transforms/MemoryBoundMerging.h @@ -150,7 +150,11 @@ private: if (!chunk.hasRows()) return; - const auto & agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception( ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index ea9ebb0f96e..fc40c6894bb 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -30,10 +30,10 @@ void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_ auto info = std::make_shared(); info->bucket_num = bucket; info->is_overflows = is_overflows; - info->chunks = std::make_shared(std::move(chunks)); + info->chunks = std::make_unique(std::move(chunks)); Chunk chunk; - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); output.push(std::move(chunk)); } @@ -255,10 +255,11 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) if (!chunk.hasRows()) return; - if (chunk.getChunkInfos().empty()) + const auto & info = chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform."); - if (auto agg_info = chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(info.get())) { Int32 bucket = agg_info->bucket_num; bool is_overflows = agg_info->is_overflows; @@ -274,7 +275,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) last_bucket_number[input] = bucket; } } - else if (chunk.getChunkInfos().get()) + else if (typeid_cast(info.get())) { single_level_chunks.emplace_back(std::move(chunk)); } @@ -303,11 +304,7 @@ void GroupingAggregatedTransform::work() Int32 bucket = cur_block.info.bucket_num; auto chunk_info = std::make_shared(); chunk_info->bucket_num = bucket; - - auto chunk = Chunk(cur_block.getColumns(), cur_block.rows()); - chunk.getChunkInfos().add(std::move(chunk_info)); - - chunks_map[bucket].emplace_back(std::move(chunk)); + chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info))); } } } @@ -322,7 +319,9 @@ MergingAggregatedBucketTransform::MergingAggregatedBucketTransform( void MergingAggregatedBucketTransform::transform(Chunk & chunk) { - auto chunks_to_merge = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + const auto * chunks_to_merge = typeid_cast(info.get()); + if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge."); @@ -331,10 +330,11 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) BlocksList blocks_list; for (auto & cur_chunk : *chunks_to_merge->chunks) { - if (cur_chunk.getChunkInfos().empty()) + const auto & cur_info = cur_chunk.getChunkInfo(); + if (!cur_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform."); - if (auto agg_info = cur_chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(cur_info.get())) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = agg_info->is_overflows; @@ -342,7 +342,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) blocks_list.emplace_back(std::move(block)); } - else if (cur_chunk.getChunkInfos().get()) + else if (typeid_cast(cur_info.get())) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = false; @@ -361,7 +361,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) res_info->is_overflows = chunks_to_merge->is_overflows; res_info->bucket_num = chunks_to_merge->bucket_num; res_info->chunk_num = chunks_to_merge->chunk_num; - chunk.getChunkInfos().add(std::move(res_info)); + chunk.setChunkInfo(std::move(res_info)); auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled); @@ -405,7 +405,11 @@ bool SortingAggregatedTransform::tryPushChunk() void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) { - auto agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index 3a3c1bd9c1e..77ee3034ffc 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -143,9 +142,9 @@ private: void addChunk(Chunk chunk, size_t from_input); }; -struct ChunksToMerge : public ChunkInfoCloneable +struct ChunksToMerge : public ChunkInfo { - std::shared_ptr chunks; + std::unique_ptr chunks; Int32 bucket_num = -1; bool is_overflows = false; UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index 446e60a0b81..ad723da7527 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -32,10 +32,11 @@ void MergingAggregatedTransform::consume(Chunk chunk) total_input_rows += input_rows; ++total_input_blocks; - if (chunk.getChunkInfos().empty()) + const auto & info = chunk.getChunkInfo(); + if (!info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform."); - if (auto agg_info = chunk.getChunkInfos().get()) + if (const auto * agg_info = typeid_cast(info.get())) { /** If the remote servers used a two-level aggregation method, * then blocks will contain information about the number of the bucket. @@ -48,7 +49,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block)); } - else if (chunk.getChunkInfos().get()) + else if (typeid_cast(info.get())) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); block.info.is_overflows = false; @@ -88,8 +89,7 @@ Chunk MergingAggregatedTransform::generate() UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - - chunk.getChunkInfos().add(std::move(info)); + chunk.setChunkInfo(std::move(info)); return chunk; } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index ee4dfa6a64e..0f433165f14 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -10,20 +10,20 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform( - Block header_, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header_, header_) - , squashing(header_, min_block_size_rows, min_block_size_bytes) + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes) { } void PlanSquashingTransform::consume(Chunk chunk) { - squashed_chunk = squashing.add(std::move(chunk)); + if (Chunk current_chunk = squashing.add(std::move(chunk)); current_chunk.hasChunkInfo()) + squashed_chunk.swap(current_chunk); } Chunk PlanSquashingTransform::generate() { - if (!squashed_chunk) + if (!squashed_chunk.hasChunkInfo()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); Chunk result_chunk; @@ -33,11 +33,12 @@ Chunk PlanSquashingTransform::generate() bool PlanSquashingTransform::canGenerate() { - return bool(squashed_chunk); + return squashed_chunk.hasChunkInfo(); } Chunk PlanSquashingTransform::getRemaining() { - return squashing.flush(); + Chunk current_chunk = squashing.flush(); + return current_chunk; } } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index e6db245499e..4ad2ec2d089 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -10,7 +10,7 @@ class PlanSquashingTransform : public IInflatingTransform { public: PlanSquashingTransform( - Block header_, size_t min_block_size_rows, size_t min_block_size_bytes); + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "PlanSquashingTransform"; } @@ -23,6 +23,7 @@ protected: private: Squashing squashing; Chunk squashed_chunk; + Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/SelectByIndicesTransform.h b/src/Processors/Transforms/SelectByIndicesTransform.h index b44f5a3203e..480ab1a0f61 100644 --- a/src/Processors/Transforms/SelectByIndicesTransform.h +++ b/src/Processors/Transforms/SelectByIndicesTransform.h @@ -26,7 +26,7 @@ public: void transform(Chunk & chunk) override { size_t num_rows = chunk.getNumRows(); - auto select_final_indices_info = chunk.getChunkInfos().extract(); + const auto * select_final_indices_info = typeid_cast(chunk.getChunkInfo().get()); if (!select_final_indices_info || !select_final_indices_info->select_final_indices) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk passed to SelectByIndicesTransform without indices column"); @@ -41,6 +41,7 @@ public: chunk.setColumns(std::move(columns), index_column->size()); } + chunk.setChunkInfo(nullptr); } }; diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 1fb4433240a..b5a40c75c5b 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -7,6 +7,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; +extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; } SquashingTransform::SquashingTransform( @@ -18,7 +19,9 @@ SquashingTransform::SquashingTransform( void SquashingTransform::onConsume(Chunk chunk) { - cur_chunk = Squashing::squash(squashing.add(std::move(chunk))); + Chunk planned_chunk = squashing.add(std::move(chunk)); + if (planned_chunk.hasChunkInfo()) + cur_chunk = DB::Squashing::squash(std::move(planned_chunk)); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -31,7 +34,10 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - finish_chunk = Squashing::squash(squashing.flush()); + Chunk chunk = squashing.flush(); + if (chunk.hasChunkInfo()) + chunk = DB::Squashing::squash(std::move(chunk)); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } void SquashingTransform::work() @@ -44,7 +50,6 @@ void SquashingTransform::work() } ExceptionKeepingTransform::work(); - if (finish_chunk) { data.chunk = std::move(finish_chunk); @@ -52,49 +57,170 @@ void SquashingTransform::work() } } -SimpleSquashingTransform::SimpleSquashingTransform( +SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ISimpleTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes) { } -void SimpleSquashingTransform::transform(Chunk & chunk) +void SimpleSquashingChunksTransform::consume(Chunk chunk) { - if (!finished) - { - chunk = Squashing::squash(squashing.add(std::move(chunk))); - } - else - { - if (chunk.hasRows()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - - chunk = Squashing::squash(squashing.flush()); - } + Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); } -IProcessor::Status SimpleSquashingTransform::prepare() +Chunk SimpleSquashingChunksTransform::generate() { - if (!finished && input.isFinished()) + if (squashed_chunk.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); + + return std::move(squashed_chunk); +} + +bool SimpleSquashingChunksTransform::canGenerate() +{ + return !squashed_chunk.empty(); +} + +Chunk SimpleSquashingChunksTransform::getRemaining() +{ + Block current_block = squashing.add({}); + squashed_chunk.setColumns(current_block.getColumns(), current_block.rows()); + return std::move(squashed_chunk); +} + +SquashingLegacy::SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_) + : min_block_size_rows(min_block_size_rows_) + , min_block_size_bytes(min_block_size_bytes_) +{ +} + +Block SquashingLegacy::add(Block && input_block) +{ + return addImpl(std::move(input_block)); +} + +Block SquashingLegacy::add(const Block & input_block) +{ + return addImpl(input_block); +} + +/* + * To minimize copying, accept two types of argument: const reference for output + * stream, and rvalue reference for input stream, and decide whether to copy + * inside this function. This allows us not to copy Block unless we absolutely + * have to. + */ +template +Block SquashingLegacy::addImpl(ReferenceType input_block) +{ + /// End of input stream. + if (!input_block) { - if (output.isFinished()) - return Status::Finished; + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } - if (!output.canPush()) - return Status::PortFull; - - if (has_output) + /// Just read block is already enough. + if (isEnoughSize(input_block)) + { + /// If no accumulated data, return just read block. + if (!accumulated_block) { - output.pushData(std::move(output_data)); - has_output = false; - return Status::PortFull; + return std::move(input_block); } - finished = true; - /// On the next call to transform() we will return all data buffered in `squashing` (if any) - return Status::Ready; + /// Return accumulated data (maybe it has small size) and place new block to accumulated data. + Block to_return = std::move(input_block); + std::swap(to_return, accumulated_block); + return to_return; } - return ISimpleTransform::prepare(); + + /// Accumulated block is already enough. + if (isEnoughSize(accumulated_block)) + { + /// Return accumulated data and place new block to accumulated data. + Block to_return = std::move(input_block); + std::swap(to_return, accumulated_block); + return to_return; + } + + append(std::move(input_block)); + if (isEnoughSize(accumulated_block)) + { + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } + + /// Squashed block is not ready. + return {}; } + + +template +void SquashingLegacy::append(ReferenceType input_block) +{ + if (!accumulated_block) + { + accumulated_block = std::move(input_block); + return; + } + + assert(blocksHaveEqualStructure(input_block, accumulated_block)); + + try + { + for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) + { + const auto source_column = input_block.getByPosition(i).column; + + auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); + mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); + accumulated_block.getByPosition(i).column = std::move(mutable_column); + } + } + catch (...) + { + /// add() may be called again even after a previous add() threw an exception. + /// Keep accumulated_block in a valid state. + /// Seems ok to discard accumulated data because we're throwing an exception, which the caller will + /// hopefully interpret to mean "this block and all *previous* blocks are potentially lost". + accumulated_block.clear(); + throw; + } +} + + +bool SquashingLegacy::isEnoughSize(const Block & block) +{ + size_t rows = 0; + size_t bytes = 0; + + for (const auto & [column, type, name] : block) + { + if (!column) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid column in block."); + + if (!rows) + rows = column->size(); + else if (rows != column->size()) + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Sizes of columns doesn't match"); + + bytes += column->byteSize(); + } + + return isEnoughSize(rows, bytes); +} + + +bool SquashingLegacy::isEnoughSize(size_t rows, size_t bytes) const +{ + return (!min_block_size_rows && !min_block_size_bytes) + || (min_block_size_rows && rows >= min_block_size_rows) + || (min_block_size_bytes && bytes >= min_block_size_bytes); +} + + } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index c5b727ac6ec..452317e7d5e 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -29,22 +30,51 @@ private: Chunk finish_chunk; }; -/// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. -class SimpleSquashingTransform : public ISimpleTransform + +class SquashingLegacy { public: - explicit SimpleSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + /// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored. + SquashingLegacy(size_t min_block_size_rows_, size_t min_block_size_bytes_); + + /** Add next block and possibly returns squashed block. + * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. + */ + Block add(Block && block); + Block add(const Block & block); + +private: + size_t min_block_size_rows; + size_t min_block_size_bytes; + + Block accumulated_block; + + template + Block addImpl(ReferenceType block); + + template + void append(ReferenceType block); + + bool isEnoughSize(const Block & block); + bool isEnoughSize(size_t rows, size_t bytes) const; +}; + +class SimpleSquashingChunksTransform : public IInflatingTransform +{ +public: + explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "SimpleSquashingTransform"; } protected: - void transform(Chunk &) override; - - IProcessor::Status prepare() override; + void consume(Chunk chunk) override; + bool canGenerate() override; + Chunk generate() override; + Chunk getRemaining() override; private: - Squashing squashing; - - bool finished = false; + SquashingLegacy squashing; + Chunk squashed_chunk; }; + } diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 59fceccb538..aa86879e62c 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -150,7 +150,11 @@ void TotalsHavingTransform::transform(Chunk & chunk) /// Block with values not included in `max_rows_to_group_by`. We'll postpone it. if (overflow_row) { - const auto & agg_info = chunk.getChunkInfos().get(); + const auto & info = chunk.getChunkInfo(); + if (!info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform."); + + const auto * agg_info = typeid_cast(info.get()); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform."); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 312b333ab33..25fbf13b0e7 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -5,9 +5,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -18,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -27,12 +24,9 @@ #include #include #include -#include "base/defines.h" -#include #include #include -#include namespace ProfileEvents @@ -111,7 +105,7 @@ private: class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform { public: - ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_, bool disable_deduplication_for_children_); + ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_); String getName() const override { return "ExecutingInnerQueryFromView"; } @@ -122,7 +116,6 @@ protected: private: ViewsDataPtr views_data; ViewRuntimeData & view; - bool disable_deduplication_for_children; struct State { @@ -145,7 +138,7 @@ class PushingToLiveViewSink final : public SinkToStorage public: PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToLiveViewSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; private: StorageLiveView & live_view; @@ -159,7 +152,7 @@ class PushingToWindowViewSink final : public SinkToStorage public: PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToWindowViewSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; private: StorageWindowView & window_view; @@ -223,10 +216,45 @@ std::optional generateViewChain( const auto & insert_settings = insert_context->getSettingsRef(); + // Do not deduplicate insertions into MV if the main insertion is Ok if (disable_deduplication_for_children) { insert_context->setSetting("insert_deduplicate", Field{false}); } + else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && + !insert_settings.insert_deduplication_token.value.empty()) + { + /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle + * deduplication in complex INSERT flows. + * + * Example: + * + * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 + * | | + * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ + * + * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will + * be inserted into `ds_2_1`. + * + * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables + * are involved. + * + * Example: + * + * landing -┬--> mv_1_1 --┬-> ds_1_1 + * | | + * └--> mv_1_2 --┘ + * + */ + auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; + + if (view_id.hasUUID()) + insert_deduplication_token += "_" + toString(view_id.uuid); + else + insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); + + insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); + } // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) @@ -333,13 +361,7 @@ std::optional generateViewChain( insert_columns.emplace_back(column.name); } - InterpreterInsertQuery interpreter( - nullptr, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; @@ -356,10 +378,6 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Before squashing", out.getInputHeader())); -#endif - auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); counting->setProgressCallback(insert_context->getProgressCallback()); @@ -402,19 +420,11 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right after Inner query", out.getInputHeader())); -#endif - auto executing_inner_query = std::make_shared( - storage_header, views_data->views.back(), views_data, disable_deduplication_for_children); + storage_header, views_data->views.back(), views_data); executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); - -#ifdef ABORT_ON_LOGICAL_ERROR - out.addSource(std::make_shared("Right before Inner query", out.getInputHeader())); -#endif } return out; @@ -455,7 +465,11 @@ Chain buildPushingToViewsChain( */ result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); - bool disable_deduplication_for_children = !context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views; + /// If the "root" table deduplicates blocks, there are no need to make deduplication for children + /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks + bool disable_deduplication_for_children = false; + if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) + disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); auto table_id = storage->getStorageID(); auto views = DatabaseCatalog::instance().getDependentViews(table_id); @@ -546,25 +560,12 @@ Chain buildPushingToViewsChain( auto sink = std::make_shared(live_view_header, *live_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { auto sink = std::make_shared(window_view->getInputHeader(), *window_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); - } - else if (dynamic_cast(storage.get())) - { - auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); - metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); - sink->setRuntimeData(thread_status, elapsed_counter_ms); - result_chain.addSource(std::move(sink)); - - result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) @@ -572,15 +573,8 @@ Chain buildPushingToViewsChain( auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); - - result_chain.addSource(std::make_shared(sink->getHeader())); - result_chain.addSource(std::move(sink)); } - else - { - result_chain.addSource(std::make_shared(storage_header)); - } if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); @@ -596,7 +590,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection && chunk_infos, bool disable_deduplication_for_children) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) { const auto & context = view.context; @@ -643,19 +637,6 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); - pipeline.addTransform(std::make_shared(std::move(chunk_infos), pipeline.getHeader())); - - if (!disable_deduplication_for_children) - { - String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted(); - pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); - } - else - { - pipeline.addTransform(std::make_shared(pipeline.getHeader())); - } - return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -747,19 +728,17 @@ IProcessor::Status CopyingDataToViewsTransform::prepare() ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( const Block & header, ViewRuntimeData & view_, - std::shared_ptr views_data_, - bool disable_deduplication_for_children_) + std::shared_ptr views_data_) : ExceptionKeepingTransform(header, view_.sample_block) , views_data(std::move(views_data_)) , view(view_) - , disable_deduplication_for_children(disable_deduplication_for_children_) { } void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { - auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); - state.emplace(process(std::move(block), view, *views_data, std::move(chunk.getChunkInfos()), disable_deduplication_for_children)); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); + state.emplace(process(block, view, *views_data)); } @@ -791,10 +770,10 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi { } -void PushingToLiveViewSink::consume(Chunk & chunk) +void PushingToLiveViewSink::consume(Chunk chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); + live_view.writeBlock(getHeader().cloneWithColumns(chunk.detachColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -814,11 +793,11 @@ PushingToWindowViewSink::PushingToWindowViewSink( { } -void PushingToWindowViewSink::consume(Chunk & chunk) +void PushingToWindowViewSink::consume(Chunk chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); + window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index a9e5b1535c0..f0b2ead687e 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -193,7 +193,7 @@ public: return concurrency_control; } - void addResources(QueryPlanResourceHolder resources_) { resources.append(std::move(resources_)); } + void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); } diff --git a/src/QueryPipeline/QueryPlanResourceHolder.cpp b/src/QueryPipeline/QueryPlanResourceHolder.cpp index bb2be2c8ffb..2cd4dc42a83 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.cpp +++ b/src/QueryPipeline/QueryPlanResourceHolder.cpp @@ -5,7 +5,7 @@ namespace DB { -QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolder && rhs) noexcept +QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept { table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); @@ -16,12 +16,6 @@ QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolde return *this; } -QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept -{ - append(std::move(rhs)); - return *this; -} - QueryPlanResourceHolder::QueryPlanResourceHolder() = default; QueryPlanResourceHolder::QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept = default; QueryPlanResourceHolder::~QueryPlanResourceHolder() = default; diff --git a/src/QueryPipeline/QueryPlanResourceHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h index 10f7f39ab09..ed9eb68b7ba 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.h +++ b/src/QueryPipeline/QueryPlanResourceHolder.h @@ -20,11 +20,8 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); - QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete; - /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept; - QueryPlanResourceHolder & append(QueryPlanResourceHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 1686a101bde..bde8ce78f55 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -105,7 +105,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( connection_entries.emplace_back(std::move(result.entry)); } - auto res = std::make_unique(std::move(connection_entries), current_settings, throttler); + auto res = std::make_unique(std::move(connection_entries), context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); @@ -127,7 +127,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( { create_connections = [this, &connection, throttler, extension_](AsyncCallback) { - auto res = std::make_unique(connection, context->getSettingsRef(), throttler); + auto res = std::make_unique(connection, context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); return res; @@ -148,7 +148,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( { create_connections = [this, connection_ptr, throttler, extension_](AsyncCallback) { - auto res = std::make_unique(connection_ptr, context->getSettingsRef(), throttler); + auto res = std::make_unique(connection_ptr, context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); return res; @@ -169,7 +169,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( { create_connections = [this, connections_, throttler, extension_](AsyncCallback) mutable { - auto res = std::make_unique(std::move(connections_), context->getSettingsRef(), throttler); + auto res = std::make_unique(std::move(connections_), context, throttler); if (extension_ && extension_->replica_info) res->setReplicaInfo(*extension_->replica_info); return res; @@ -234,7 +234,7 @@ RemoteQueryExecutor::RemoteQueryExecutor( timeouts, current_settings, pool_mode, std::move(async_callback), skip_unavailable_endpoints, priority_func); } - auto res = std::make_unique(std::move(connection_entries), current_settings, throttler); + auto res = std::make_unique(std::move(connection_entries), context, throttler); if (extension && extension->replica_info) res->setReplicaInfo(*extension->replica_info); return res; diff --git a/src/Server/HTTP/authenticateUserByHTTP.cpp b/src/Server/HTTP/authenticateUserByHTTP.cpp new file mode 100644 index 00000000000..ac43bfd64c0 --- /dev/null +++ b/src/Server/HTTP/authenticateUserByHTTP.cpp @@ -0,0 +1,265 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#if USE_SSL +#include +#endif + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int AUTHENTICATION_FAILED; + extern const int BAD_ARGUMENTS; + extern const int SUPPORT_IS_DISABLED; +} + + +namespace +{ + /// Throws an exception that multiple authorization schemes are used simultaneously. + [[noreturn]] void throwMultipleAuthenticationMethods(std::string_view method1, std::string_view method2) + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: it is not allowed to use {} and {} simultaneously", method1, method2); + } + + /// Checks that a specified user name is not empty, and throws an exception if it's empty. + void checkUserNameNotEmpty(const String & user_name, std::string_view method) + { + if (user_name.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Got an empty user name from {}", method); + } +} + + +bool authenticateUserByHTTP( + const HTTPServerRequest & request, + const HTMLForm & params, + HTTPServerResponse & response, + Session & session, + std::unique_ptr & request_credentials, + ContextPtr global_context, + LoggerPtr log) +{ + /// Get the credentials created by the previous call of authenticateUserByHTTP() while handling the previous HTTP request. + auto current_credentials = std::move(request_credentials); + + /// The user and password can be passed by headers (similar to X-Auth-*), + /// which is used by load balancers to pass authentication information. + std::string user = request.get("X-ClickHouse-User", ""); + std::string password = request.get("X-ClickHouse-Key", ""); + std::string quota_key = request.get("X-ClickHouse-Quota", ""); + bool has_auth_headers = !user.empty() || !password.empty(); + + /// The header 'X-ClickHouse-SSL-Certificate-Auth: on' enables checking the common name + /// extracted from the SSL certificate used for this connection instead of checking password. + bool has_ssl_certificate_auth = (request.get("X-ClickHouse-SSL-Certificate-Auth", "") == "on"); + + /// User name and password can be passed using HTTP Basic auth or query parameters + /// (both methods are insecure). + bool has_http_credentials = request.hasCredentials(); + bool has_credentials_in_query_params = params.has("user") || params.has("password"); + + std::string spnego_challenge; + SSLCertificateSubjects certificate_subjects; + + if (has_ssl_certificate_auth) + { +#if USE_SSL + /// For SSL certificate authentication we extract the user name from the "X-ClickHouse-User" HTTP header. + checkUserNameNotEmpty(user, "X-ClickHouse HTTP headers"); + + /// It is prohibited to mix different authorization schemes. + if (!password.empty()) + throwMultipleAuthenticationMethods("SSL certificate authentication", "authentication via password"); + if (has_http_credentials) + throwMultipleAuthenticationMethods("SSL certificate authentication", "Authorization HTTP header"); + if (has_credentials_in_query_params) + throwMultipleAuthenticationMethods("SSL certificate authentication", "authentication via parameters"); + + if (request.havePeerCertificate()) + certificate_subjects = extractSSLCertificateSubjects(request.peerCertificate()); + + if (certificate_subjects.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name or Subject Alternative Name"); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "SSL certificate authentication disabled because ClickHouse was built without SSL library"); +#endif + } + else if (has_auth_headers) + { + checkUserNameNotEmpty(user, "X-ClickHouse HTTP headers"); + + /// It is prohibited to mix different authorization schemes. + if (has_http_credentials) + throwMultipleAuthenticationMethods("X-ClickHouse HTTP headers", "Authorization HTTP header"); + if (has_credentials_in_query_params) + throwMultipleAuthenticationMethods("X-ClickHouse HTTP headers", "authentication via parameters"); + } + else if (has_http_credentials) + { + /// It is prohibited to mix different authorization schemes. + if (has_credentials_in_query_params) + throwMultipleAuthenticationMethods("Authorization HTTP header", "authentication via parameters"); + + std::string scheme; + std::string auth_info; + request.getCredentials(scheme, auth_info); + + if (Poco::icompare(scheme, "Basic") == 0) + { + Poco::Net::HTTPBasicCredentials credentials(auth_info); + user = credentials.getUsername(); + password = credentials.getPassword(); + checkUserNameNotEmpty(user, "Authorization HTTP header"); + } + else if (Poco::icompare(scheme, "Negotiate") == 0) + { + spnego_challenge = auth_info; + + if (spnego_challenge.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty"); + } + else + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme); + } + } + else + { + /// If the user name is not set we assume it's the 'default' user. + user = params.get("user", "default"); + password = params.get("password", ""); + checkUserNameNotEmpty(user, "authentication via parameters"); + } + + if (!certificate_subjects.empty()) + { + chassert(!user.empty()); + if (!current_credentials) + current_credentials = std::make_unique(user, std::move(certificate_subjects)); + + auto * certificate_credentials = dynamic_cast(current_credentials.get()); + if (!certificate_credentials) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected SSL certificate authorization scheme"); + } + else if (!spnego_challenge.empty()) + { + if (!current_credentials) + current_credentials = global_context->makeGSSAcceptorContext(); + + auto * gss_acceptor_context = dynamic_cast(current_credentials.get()); + if (!gss_acceptor_context) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected"); + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" + const auto spnego_response = base64Encode(gss_acceptor_context->processToken(base64Decode(spnego_challenge), log)); +#pragma clang diagnostic pop + + if (!spnego_response.empty()) + response.set("WWW-Authenticate", "Negotiate " + spnego_response); + + if (!gss_acceptor_context->isFailed() && !gss_acceptor_context->isReady()) + { + if (spnego_response.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: 'Negotiate' HTTP Authorization failure"); + + response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); + response.send(); + /// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request. + request_credentials = std::move(current_credentials); + return false; + } + } + else // I.e., now using user name and password strings ("Basic"). + { + if (!current_credentials) + current_credentials = std::make_unique(); + + auto * basic_credentials = dynamic_cast(current_credentials.get()); + if (!basic_credentials) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected 'Basic' HTTP Authorization scheme"); + + chassert(!user.empty()); + basic_credentials->setUserName(user); + basic_credentials->setPassword(password); + } + + if (params.has("quota_key")) + { + if (!quota_key.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Invalid authentication: it is not allowed " + "to use quota key as HTTP header and as parameter simultaneously"); + + quota_key = params.get("quota_key"); + } + + /// Set client info. It will be used for quota accounting parameters in 'setUser' method. + + session.setHTTPClientInfo(request); + session.setQuotaClientKey(quota_key); + + /// Extract the last entry from comma separated list of forwarded_for addresses. + /// Only the last proxy can be trusted (if any). + String forwarded_address = session.getClientInfo().getLastForwardedFor(); + try + { + if (!forwarded_address.empty() && global_context->getConfigRef().getBool("auth_use_forwarded_address", false)) + session.authenticate(*current_credentials, Poco::Net::SocketAddress(forwarded_address, request.clientAddress().port())); + else + session.authenticate(*current_credentials, request.clientAddress()); + } + catch (const Authentication::Require & required_credentials) + { + current_credentials = std::make_unique(); + + if (required_credentials.getRealm().empty()) + response.set("WWW-Authenticate", "Basic"); + else + response.set("WWW-Authenticate", "Basic realm=\"" + required_credentials.getRealm() + "\""); + + response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); + response.send(); + /// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request. + request_credentials = std::move(current_credentials); + return false; + } + catch (const Authentication::Require & required_credentials) + { + current_credentials = global_context->makeGSSAcceptorContext(); + + if (required_credentials.getRealm().empty()) + response.set("WWW-Authenticate", "Negotiate"); + else + response.set("WWW-Authenticate", "Negotiate realm=\"" + required_credentials.getRealm() + "\""); + + response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); + response.send(); + /// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request. + request_credentials = std::move(current_credentials); + return false; + } + + return true; +} + +} diff --git a/src/Server/HTTP/authenticateUserByHTTP.h b/src/Server/HTTP/authenticateUserByHTTP.h new file mode 100644 index 00000000000..3b5a04cae68 --- /dev/null +++ b/src/Server/HTTP/authenticateUserByHTTP.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class HTTPServerRequest; +class HTMLForm; +class HTTPServerResponse; +class Session; +class Credentials; + +/// Authenticates a user via HTTP protocol and initializes a session. +/// Usually retrieves the name and the password for that user from either the request's headers or from the query parameters. +/// Returns true when the user successfully authenticated, +/// the session instance will be configured accordingly, and the request_credentials instance will be dropped. +/// Returns false when the user is not authenticated yet, and the HTTP_UNAUTHORIZED response is sent with the "WWW-Authenticate" header, +/// in this case the `request_credentials` instance must be preserved until the next request or until any exception. +/// Throws an exception if authentication failed. +bool authenticateUserByHTTP( + const HTTPServerRequest & request, + const HTMLForm & params, + HTTPServerResponse & response, + Session & session, + std::unique_ptr & request_credentials, + ContextPtr global_context, + LoggerPtr log); + +} diff --git a/src/Server/HTTP/exceptionCodeToHTTPStatus.cpp b/src/Server/HTTP/exceptionCodeToHTTPStatus.cpp new file mode 100644 index 00000000000..6de57217aac --- /dev/null +++ b/src/Server/HTTP/exceptionCodeToHTTPStatus.cpp @@ -0,0 +1,158 @@ +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int CANNOT_PARSE_TEXT; + extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; + extern const int CANNOT_PARSE_QUOTED_STRING; + extern const int CANNOT_PARSE_DATE; + extern const int CANNOT_PARSE_DATETIME; + extern const int CANNOT_PARSE_NUMBER; + extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; + extern const int CANNOT_PARSE_IPV4; + extern const int CANNOT_PARSE_IPV6; + extern const int CANNOT_PARSE_UUID; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; + extern const int CANNOT_SCHEDULE_TASK; + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_COMPILE_REGEXP; + extern const int DUPLICATE_COLUMN; + extern const int ILLEGAL_COLUMN; + extern const int THERE_IS_NO_COLUMN; + extern const int UNKNOWN_ELEMENT_IN_AST; + extern const int UNKNOWN_TYPE_OF_AST_NODE; + extern const int TOO_DEEP_AST; + extern const int TOO_BIG_AST; + extern const int UNEXPECTED_AST_STRUCTURE; + extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; + + extern const int SYNTAX_ERROR; + + extern const int INCORRECT_DATA; + extern const int TYPE_MISMATCH; + + extern const int UNKNOWN_TABLE; + extern const int UNKNOWN_FUNCTION; + extern const int UNKNOWN_IDENTIFIER; + extern const int UNKNOWN_TYPE; + extern const int UNKNOWN_STORAGE; + extern const int UNKNOWN_DATABASE; + extern const int UNKNOWN_SETTING; + extern const int UNKNOWN_DIRECTION_OF_SORTING; + extern const int UNKNOWN_AGGREGATE_FUNCTION; + extern const int UNKNOWN_FORMAT; + extern const int UNKNOWN_DATABASE_ENGINE; + extern const int UNKNOWN_TYPE_OF_QUERY; + extern const int UNKNOWN_ROLE; + + extern const int QUERY_IS_TOO_LARGE; + + extern const int NOT_IMPLEMENTED; + extern const int SOCKET_TIMEOUT; + + extern const int UNKNOWN_USER; + extern const int WRONG_PASSWORD; + extern const int REQUIRED_PASSWORD; + extern const int AUTHENTICATION_FAILED; + extern const int SET_NON_GRANTED_ROLE; + + extern const int HTTP_LENGTH_REQUIRED; + + extern const int TIMEOUT_EXCEEDED; +} + + +Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code) +{ + using namespace Poco::Net; + + if (exception_code == ErrorCodes::REQUIRED_PASSWORD) + { + return HTTPResponse::HTTP_UNAUTHORIZED; + } + else if (exception_code == ErrorCodes::UNKNOWN_USER || + exception_code == ErrorCodes::WRONG_PASSWORD || + exception_code == ErrorCodes::AUTHENTICATION_FAILED || + exception_code == ErrorCodes::SET_NON_GRANTED_ROLE) + { + return HTTPResponse::HTTP_FORBIDDEN; + } + else if (exception_code == ErrorCodes::BAD_ARGUMENTS || + exception_code == ErrorCodes::CANNOT_COMPILE_REGEXP || + exception_code == ErrorCodes::CANNOT_PARSE_TEXT || + exception_code == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE || + exception_code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING || + exception_code == ErrorCodes::CANNOT_PARSE_DATE || + exception_code == ErrorCodes::CANNOT_PARSE_DATETIME || + exception_code == ErrorCodes::CANNOT_PARSE_NUMBER || + exception_code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING || + exception_code == ErrorCodes::CANNOT_PARSE_IPV4 || + exception_code == ErrorCodes::CANNOT_PARSE_IPV6 || + exception_code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || + exception_code == ErrorCodes::CANNOT_PARSE_UUID || + exception_code == ErrorCodes::DUPLICATE_COLUMN || + exception_code == ErrorCodes::ILLEGAL_COLUMN || + exception_code == ErrorCodes::UNKNOWN_ELEMENT_IN_AST || + exception_code == ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE || + exception_code == ErrorCodes::THERE_IS_NO_COLUMN || + exception_code == ErrorCodes::TOO_DEEP_AST || + exception_code == ErrorCodes::TOO_BIG_AST || + exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE || + exception_code == ErrorCodes::SYNTAX_ERROR || + exception_code == ErrorCodes::INCORRECT_DATA || + exception_code == ErrorCodes::TYPE_MISMATCH || + exception_code == ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE) + { + return HTTPResponse::HTTP_BAD_REQUEST; + } + else if (exception_code == ErrorCodes::UNKNOWN_TABLE || + exception_code == ErrorCodes::UNKNOWN_FUNCTION || + exception_code == ErrorCodes::UNKNOWN_IDENTIFIER || + exception_code == ErrorCodes::UNKNOWN_TYPE || + exception_code == ErrorCodes::UNKNOWN_STORAGE || + exception_code == ErrorCodes::UNKNOWN_DATABASE || + exception_code == ErrorCodes::UNKNOWN_SETTING || + exception_code == ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING || + exception_code == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION || + exception_code == ErrorCodes::UNKNOWN_FORMAT || + exception_code == ErrorCodes::UNKNOWN_DATABASE_ENGINE || + exception_code == ErrorCodes::UNKNOWN_TYPE_OF_QUERY || + exception_code == ErrorCodes::UNKNOWN_ROLE) + { + return HTTPResponse::HTTP_NOT_FOUND; + } + else if (exception_code == ErrorCodes::QUERY_IS_TOO_LARGE) + { + return HTTPResponse::HTTP_REQUESTENTITYTOOLARGE; + } + else if (exception_code == ErrorCodes::NOT_IMPLEMENTED) + { + return HTTPResponse::HTTP_NOT_IMPLEMENTED; + } + else if (exception_code == ErrorCodes::SOCKET_TIMEOUT || + exception_code == ErrorCodes::CANNOT_OPEN_FILE) + { + return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; + } + else if (exception_code == ErrorCodes::HTTP_LENGTH_REQUIRED) + { + return HTTPResponse::HTTP_LENGTH_REQUIRED; + } + else if (exception_code == ErrorCodes::TIMEOUT_EXCEEDED) + { + return HTTPResponse::HTTP_REQUEST_TIMEOUT; + } + else if (exception_code == ErrorCodes::CANNOT_SCHEDULE_TASK) + { + return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; + } + + return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR; +} + +} diff --git a/src/Server/HTTP/exceptionCodeToHTTPStatus.h b/src/Server/HTTP/exceptionCodeToHTTPStatus.h new file mode 100644 index 00000000000..aadec7aac5a --- /dev/null +++ b/src/Server/HTTP/exceptionCodeToHTTPStatus.h @@ -0,0 +1,11 @@ +#pragma once +#include + + +namespace DB +{ + +/// Converts Exception code to HTTP status code. +Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code); + +} diff --git a/src/Server/HTTP/sendExceptionToHTTPClient.cpp b/src/Server/HTTP/sendExceptionToHTTPClient.cpp new file mode 100644 index 00000000000..78650758e35 --- /dev/null +++ b/src/Server/HTTP/sendExceptionToHTTPClient.cpp @@ -0,0 +1,80 @@ +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int HTTP_LENGTH_REQUIRED; + extern const int REQUIRED_PASSWORD; +} + + +void sendExceptionToHTTPClient( + const String & exception_message, + int exception_code, + HTTPServerRequest & request, + HTTPServerResponse & response, + WriteBufferFromHTTPServerResponse * out, + LoggerPtr log) +{ + setHTTPResponseStatusAndHeadersForException(exception_code, request, response, out, log); + + if (!out) + { + /// If nothing was sent yet. + WriteBufferFromHTTPServerResponse out_for_message{response, request.getMethod() == HTTPRequest::HTTP_HEAD, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT}; + + out_for_message.writeln(exception_message); + out_for_message.finalize(); + } + else + { + /// If buffer has data, and that data wasn't sent yet, then no need to send that data + bool data_sent = (out->count() != out->offset()); + + if (!data_sent) + out->position() = out->buffer().begin(); + + out->writeln(exception_message); + out->finalize(); + } +} + + +void setHTTPResponseStatusAndHeadersForException( + int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, WriteBufferFromHTTPServerResponse * out, LoggerPtr log) +{ + if (out) + out->setExceptionCode(exception_code); + else + response.set("X-ClickHouse-Exception-Code", toString(exception_code)); + + /// If HTTP method is POST and Keep-Alive is turned on, we should try to read the whole request body + /// to avoid reading part of the current request body in the next request. + if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive() + && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED) + { + try + { + if (!request.getStream().eof()) + request.getStream().ignoreAll(); + } + catch (...) + { + tryLogCurrentException(log, "Cannot read remaining request body during exception handling"); + response.setKeepAlive(false); + } + } + + if (exception_code == ErrorCodes::REQUIRED_PASSWORD) + response.requireAuthentication("ClickHouse server HTTP API"); + else + response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code)); +} +} diff --git a/src/Server/HTTP/sendExceptionToHTTPClient.h b/src/Server/HTTP/sendExceptionToHTTPClient.h new file mode 100644 index 00000000000..31fda88d900 --- /dev/null +++ b/src/Server/HTTP/sendExceptionToHTTPClient.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class HTTPServerRequest; +class HTTPServerResponse; +class WriteBufferFromHTTPServerResponse; + +/// Sends an exception to HTTP client. This function doesn't handle its own exceptions so it needs to be wrapped in try-catch. +/// Argument `out` may be either created from `response` or be nullptr (if it wasn't created before the exception). +void sendExceptionToHTTPClient( + const String & exception_message, + int exception_code, + HTTPServerRequest & request, + HTTPServerResponse & response, + WriteBufferFromHTTPServerResponse * out, + LoggerPtr log); + +/// Sets "X-ClickHouse-Exception-Code" header and the correspondent HTTP status in the response for an exception. +/// This is a part of what sendExceptionToHTTPClient() does. +void setHTTPResponseStatusAndHeadersForException( + int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, WriteBufferFromHTTPServerResponse * out, LoggerPtr log); +} diff --git a/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp new file mode 100644 index 00000000000..d42bd77e339 --- /dev/null +++ b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp @@ -0,0 +1,24 @@ +#include + +#include +#include + + +namespace DB +{ + +void setReadOnlyIfHTTPMethodIdempotent(ContextMutablePtr context, const String & http_method) +{ + /// Anything else beside HTTP POST should be readonly queries. + if (http_method != HTTPServerRequest::HTTP_POST) + { + /// 'readonly' setting values mean: + /// readonly = 0 - any query is allowed, client can change any setting. + /// readonly = 1 - only readonly queries are allowed, client can't change settings. + /// readonly = 2 - only readonly queries are allowed, client can change any setting except 'readonly'. + if (context->getSettingsRef().readonly == 0) + context->setSetting("readonly", 2); + } +} + +} diff --git a/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h new file mode 100644 index 00000000000..c46f2032d82 --- /dev/null +++ b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h @@ -0,0 +1,12 @@ +#pragma once + +#include + + +namespace DB +{ + +/// Sets readonly = 2 if the current HTTP method is not HTTP POST and if readonly is not set already. +void setReadOnlyIfHTTPMethodIdempotent(ContextMutablePtr context, const String & http_method); + +} diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index a00f6fb5412..370af79e456 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -1,11 +1,6 @@ #include -#include #include -#include -#include -#include -#include #include #include #include @@ -37,20 +32,14 @@ #include #include #include +#include +#include +#include #include -#include -#include "config.h" - -#include -#include -#include #include -#include -#include -#include -#include -#include + +#include "config.h" #include #include @@ -60,78 +49,19 @@ #include #include -#if USE_SSL -#include -#endif - namespace DB { namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int CANNOT_COMPILE_REGEXP; - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_PARSE_TEXT; - extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; - extern const int CANNOT_PARSE_QUOTED_STRING; - extern const int CANNOT_PARSE_DATE; - extern const int CANNOT_PARSE_DATETIME; - extern const int CANNOT_PARSE_NUMBER; - extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; - extern const int CANNOT_PARSE_IPV4; - extern const int CANNOT_PARSE_IPV6; - extern const int CANNOT_PARSE_UUID; - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; - extern const int CANNOT_SCHEDULE_TASK; - extern const int DUPLICATE_COLUMN; - extern const int ILLEGAL_COLUMN; - extern const int THERE_IS_NO_COLUMN; - extern const int UNKNOWN_ELEMENT_IN_AST; - extern const int UNKNOWN_TYPE_OF_AST_NODE; - extern const int TOO_DEEP_AST; - extern const int TOO_BIG_AST; - extern const int UNEXPECTED_AST_STRUCTURE; - extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; - extern const int SYNTAX_ERROR; - - extern const int INCORRECT_DATA; - extern const int TYPE_MISMATCH; - - extern const int UNKNOWN_TABLE; - extern const int UNKNOWN_FUNCTION; - extern const int UNKNOWN_IDENTIFIER; - extern const int UNKNOWN_TYPE; - extern const int UNKNOWN_STORAGE; - extern const int UNKNOWN_DATABASE; - extern const int UNKNOWN_SETTING; - extern const int UNKNOWN_DIRECTION_OF_SORTING; - extern const int UNKNOWN_AGGREGATE_FUNCTION; - extern const int UNKNOWN_FORMAT; - extern const int UNKNOWN_DATABASE_ENGINE; - extern const int UNKNOWN_TYPE_OF_QUERY; - extern const int UNKNOWN_ROLE; extern const int NO_ELEMENTS_IN_CONFIG; - extern const int QUERY_IS_TOO_LARGE; - - extern const int NOT_IMPLEMENTED; - extern const int SOCKET_TIMEOUT; - - extern const int UNKNOWN_USER; - extern const int WRONG_PASSWORD; - extern const int REQUIRED_PASSWORD; - extern const int AUTHENTICATION_FAILED; - extern const int SET_NON_GRANTED_ROLE; - extern const int INVALID_SESSION_TIMEOUT; extern const int HTTP_LENGTH_REQUIRED; - extern const int SUPPORT_IS_DISABLED; - - extern const int TIMEOUT_EXCEEDED; } namespace @@ -173,115 +103,6 @@ void processOptionsRequest(HTTPServerResponse & response, const Poco::Util::Laye } } -static String base64Decode(const String & encoded) -{ - String decoded; - Poco::MemoryInputStream istr(encoded.data(), encoded.size()); - Poco::Base64Decoder decoder(istr); - Poco::StreamCopier::copyToString(decoder, decoded); - return decoded; -} - -static String base64Encode(const String & decoded) -{ - std::ostringstream ostr; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - ostr.exceptions(std::ios::failbit); - Poco::Base64Encoder encoder(ostr); - encoder.rdbuf()->setLineLength(0); - encoder << decoded; - encoder.close(); - return ostr.str(); -} - -static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code) -{ - using namespace Poco::Net; - - if (exception_code == ErrorCodes::REQUIRED_PASSWORD) - { - return HTTPResponse::HTTP_UNAUTHORIZED; - } - else if (exception_code == ErrorCodes::UNKNOWN_USER || - exception_code == ErrorCodes::WRONG_PASSWORD || - exception_code == ErrorCodes::AUTHENTICATION_FAILED || - exception_code == ErrorCodes::SET_NON_GRANTED_ROLE) - { - return HTTPResponse::HTTP_FORBIDDEN; - } - else if (exception_code == ErrorCodes::BAD_ARGUMENTS || - exception_code == ErrorCodes::CANNOT_COMPILE_REGEXP || - exception_code == ErrorCodes::CANNOT_PARSE_TEXT || - exception_code == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE || - exception_code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING || - exception_code == ErrorCodes::CANNOT_PARSE_DATE || - exception_code == ErrorCodes::CANNOT_PARSE_DATETIME || - exception_code == ErrorCodes::CANNOT_PARSE_NUMBER || - exception_code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING || - exception_code == ErrorCodes::CANNOT_PARSE_IPV4 || - exception_code == ErrorCodes::CANNOT_PARSE_IPV6 || - exception_code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || - exception_code == ErrorCodes::CANNOT_PARSE_UUID || - exception_code == ErrorCodes::DUPLICATE_COLUMN || - exception_code == ErrorCodes::ILLEGAL_COLUMN || - exception_code == ErrorCodes::UNKNOWN_ELEMENT_IN_AST || - exception_code == ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE || - exception_code == ErrorCodes::THERE_IS_NO_COLUMN || - exception_code == ErrorCodes::TOO_DEEP_AST || - exception_code == ErrorCodes::TOO_BIG_AST || - exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE || - exception_code == ErrorCodes::SYNTAX_ERROR || - exception_code == ErrorCodes::INCORRECT_DATA || - exception_code == ErrorCodes::TYPE_MISMATCH || - exception_code == ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE) - { - return HTTPResponse::HTTP_BAD_REQUEST; - } - else if (exception_code == ErrorCodes::UNKNOWN_TABLE || - exception_code == ErrorCodes::UNKNOWN_FUNCTION || - exception_code == ErrorCodes::UNKNOWN_IDENTIFIER || - exception_code == ErrorCodes::UNKNOWN_TYPE || - exception_code == ErrorCodes::UNKNOWN_STORAGE || - exception_code == ErrorCodes::UNKNOWN_DATABASE || - exception_code == ErrorCodes::UNKNOWN_SETTING || - exception_code == ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING || - exception_code == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION || - exception_code == ErrorCodes::UNKNOWN_FORMAT || - exception_code == ErrorCodes::UNKNOWN_DATABASE_ENGINE || - exception_code == ErrorCodes::UNKNOWN_TYPE_OF_QUERY || - exception_code == ErrorCodes::UNKNOWN_ROLE) - { - return HTTPResponse::HTTP_NOT_FOUND; - } - else if (exception_code == ErrorCodes::QUERY_IS_TOO_LARGE) - { - return HTTPResponse::HTTP_REQUESTENTITYTOOLARGE; - } - else if (exception_code == ErrorCodes::NOT_IMPLEMENTED) - { - return HTTPResponse::HTTP_NOT_IMPLEMENTED; - } - else if (exception_code == ErrorCodes::SOCKET_TIMEOUT || - exception_code == ErrorCodes::CANNOT_OPEN_FILE) - { - return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; - } - else if (exception_code == ErrorCodes::HTTP_LENGTH_REQUIRED) - { - return HTTPResponse::HTTP_LENGTH_REQUIRED; - } - else if (exception_code == ErrorCodes::TIMEOUT_EXCEEDED) - { - return HTTPResponse::HTTP_REQUEST_TIMEOUT; - } - else if (exception_code == ErrorCodes::CANNOT_SCHEDULE_TASK) - { - return HTTPResponse::HTTP_SERVICE_UNAVAILABLE; - } - - return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR; -} - - static std::chrono::steady_clock::duration parseSessionTimeout( const Poco::Util::AbstractConfiguration & config, const HTMLForm & params) @@ -358,204 +179,9 @@ HTTPHandler::HTTPHandler(IServer & server_, const std::string & name, const HTTP HTTPHandler::~HTTPHandler() = default; -bool HTTPHandler::authenticateUser( - HTTPServerRequest & request, - HTMLForm & params, - HTTPServerResponse & response) +bool HTTPHandler::authenticateUser(HTTPServerRequest & request, HTMLForm & params, HTTPServerResponse & response) { - using namespace Poco::Net; - - /// The user and password can be passed by headers (similar to X-Auth-*), - /// which is used by load balancers to pass authentication information. - std::string user = request.get("X-ClickHouse-User", ""); - std::string password = request.get("X-ClickHouse-Key", ""); - std::string quota_key = request.get("X-ClickHouse-Quota", ""); - - /// The header 'X-ClickHouse-SSL-Certificate-Auth: on' enables checking the common name - /// extracted from the SSL certificate used for this connection instead of checking password. - bool has_ssl_certificate_auth = (request.get("X-ClickHouse-SSL-Certificate-Auth", "") == "on"); - bool has_auth_headers = !user.empty() || !password.empty() || has_ssl_certificate_auth; - - /// User name and password can be passed using HTTP Basic auth or query parameters - /// (both methods are insecure). - bool has_http_credentials = request.hasCredentials(); - bool has_credentials_in_query_params = params.has("user") || params.has("password"); - - std::string spnego_challenge; - SSLCertificateSubjects certificate_subjects; - - if (has_auth_headers) - { - /// It is prohibited to mix different authorization schemes. - if (has_http_credentials) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: it is not allowed " - "to use SSL certificate authentication and Authorization HTTP header simultaneously"); - if (has_credentials_in_query_params) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: it is not allowed " - "to use SSL certificate authentication and authentication via parameters simultaneously simultaneously"); - - if (has_ssl_certificate_auth) - { -#if USE_SSL - if (!password.empty()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: it is not allowed " - "to use SSL certificate authentication and authentication via password simultaneously"); - - if (request.havePeerCertificate()) - certificate_subjects = extractSSLCertificateSubjects(request.peerCertificate()); - - if (certificate_subjects.empty()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name or Subject Alternative Name"); -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, - "SSL certificate authentication disabled because ClickHouse was built without SSL library"); -#endif - } - } - else if (has_http_credentials) - { - /// It is prohibited to mix different authorization schemes. - if (has_credentials_in_query_params) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, - "Invalid authentication: it is not allowed " - "to use Authorization HTTP header and authentication via parameters simultaneously"); - - std::string scheme; - std::string auth_info; - request.getCredentials(scheme, auth_info); - - if (Poco::icompare(scheme, "Basic") == 0) - { - HTTPBasicCredentials credentials(auth_info); - user = credentials.getUsername(); - password = credentials.getPassword(); - } - else if (Poco::icompare(scheme, "Negotiate") == 0) - { - spnego_challenge = auth_info; - - if (spnego_challenge.empty()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty"); - } - else - { - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme); - } - } - else - { - /// If the user name is not set we assume it's the 'default' user. - user = params.get("user", "default"); - password = params.get("password", ""); - } - - if (!certificate_subjects.empty()) - { - if (!request_credentials) - request_credentials = std::make_unique(user, std::move(certificate_subjects)); - - auto * certificate_credentials = dynamic_cast(request_credentials.get()); - if (!certificate_credentials) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected SSL certificate authorization scheme"); - } - else if (!spnego_challenge.empty()) - { - if (!request_credentials) - request_credentials = server.context()->makeGSSAcceptorContext(); - - auto * gss_acceptor_context = dynamic_cast(request_credentials.get()); - if (!gss_acceptor_context) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected"); - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunreachable-code" - const auto spnego_response = base64Encode(gss_acceptor_context->processToken(base64Decode(spnego_challenge), log)); -#pragma clang diagnostic pop - - if (!spnego_response.empty()) - response.set("WWW-Authenticate", "Negotiate " + spnego_response); - - if (!gss_acceptor_context->isFailed() && !gss_acceptor_context->isReady()) - { - if (spnego_response.empty()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: 'Negotiate' HTTP Authorization failure"); - - response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); - response.send(); - return false; - } - } - else // I.e., now using user name and password strings ("Basic"). - { - if (!request_credentials) - request_credentials = std::make_unique(); - - auto * basic_credentials = dynamic_cast(request_credentials.get()); - if (!basic_credentials) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected 'Basic' HTTP Authorization scheme"); - - basic_credentials->setUserName(user); - basic_credentials->setPassword(password); - } - - if (params.has("quota_key")) - { - if (!quota_key.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Invalid authentication: it is not allowed " - "to use quota key as HTTP header and as parameter simultaneously"); - - quota_key = params.get("quota_key"); - } - - /// Set client info. It will be used for quota accounting parameters in 'setUser' method. - - session->setHTTPClientInfo(request); - session->setQuotaClientKey(quota_key); - - /// Extract the last entry from comma separated list of forwarded_for addresses. - /// Only the last proxy can be trusted (if any). - String forwarded_address = session->getClientInfo().getLastForwardedFor(); - try - { - if (!forwarded_address.empty() && server.config().getBool("auth_use_forwarded_address", false)) - session->authenticate(*request_credentials, Poco::Net::SocketAddress(forwarded_address, request.clientAddress().port())); - else - session->authenticate(*request_credentials, request.clientAddress()); - } - catch (const Authentication::Require & required_credentials) - { - request_credentials = std::make_unique(); - - if (required_credentials.getRealm().empty()) - response.set("WWW-Authenticate", "Basic"); - else - response.set("WWW-Authenticate", "Basic realm=\"" + required_credentials.getRealm() + "\""); - - response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); - response.send(); - return false; - } - catch (const Authentication::Require & required_credentials) - { - request_credentials = server.context()->makeGSSAcceptorContext(); - - if (required_credentials.getRealm().empty()) - response.set("WWW-Authenticate", "Negotiate"); - else - response.set("WWW-Authenticate", "Negotiate realm=\"" + required_credentials.getRealm() + "\""); - - response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); - response.send(); - return false; - } - - request_credentials.reset(); - return true; + return authenticateUserByHTTP(request, params, response, *session, request_credentials, server.context(), log); } @@ -727,10 +353,22 @@ void HTTPHandler::processQuery( std::unique_ptr in; - static const NameSet reserved_param_names{"compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace", "role", - "buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check", "client_protocol_version", "close_session"}; + auto roles = params.getAll("role"); + if (!roles.empty()) + context->setCurrentRoles(roles); - Names reserved_param_suffixes; + std::string database = request.get("X-ClickHouse-Database", params.get("database", "")); + if (!database.empty()) + context->setCurrentDatabase(database); + + std::string default_format = request.get("X-ClickHouse-Format", params.get("default_format", "")); + if (!default_format.empty()) + context->setDefaultFormat(default_format); + + /// Anything else beside HTTP POST should be readonly queries. + setReadOnlyIfHTTPMethodIdempotent(context, request.getMethod()); + + bool has_external_data = startsWith(request.getContentType(), "multipart/form-data"); auto param_could_be_skipped = [&] (const String & name) { @@ -738,87 +376,36 @@ void HTTPHandler::processQuery( if (name.empty()) return true; + /// Some parameters (database, default_format, everything used in the code above) do not + /// belong to the Settings class. + static const NameSet reserved_param_names{"compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace", "role", + "buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check", "client_protocol_version", "close_session", + "database", "default_format"}; + if (reserved_param_names.contains(name)) return true; - for (const String & suffix : reserved_param_suffixes) + /// For external data we also want settings. + if (has_external_data) { - if (endsWith(name, suffix)) - return true; + /// Skip unneeded parameters to avoid confusing them later with context settings or query parameters. + /// It is a bug and ambiguity with `date_time_input_format` and `low_cardinality_allow_in_native_format` formats/settings. + static const Names reserved_param_suffixes = {"_format", "_types", "_structure"}; + for (const String & suffix : reserved_param_suffixes) + { + if (endsWith(name, suffix)) + return true; + } } return false; }; - auto roles = params.getAll("role"); - if (!roles.empty()) - { - const auto & access_control = context->getAccessControl(); - const auto & user = context->getUser(); - std::vector roles_ids(roles.size()); - for (size_t i = 0; i < roles.size(); i++) - { - auto role_id = access_control.getID(roles[i]); - if (user->granted_roles.isGranted(role_id)) - roles_ids[i] = role_id; - else - throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role {} should be granted to set as a current", roles[i].get()); - } - context->setCurrentRoles(roles_ids); - } - /// Settings can be overridden in the query. - /// Some parameters (database, default_format, everything used in the code above) do not - /// belong to the Settings class. - - /// 'readonly' setting values mean: - /// readonly = 0 - any query is allowed, client can change any setting. - /// readonly = 1 - only readonly queries are allowed, client can't change settings. - /// readonly = 2 - only readonly queries are allowed, client can change any setting except 'readonly'. - - /// In theory if initially readonly = 0, the client can change any setting and then set readonly - /// to some other value. - const auto & settings = context->getSettingsRef(); - - /// Anything else beside HTTP POST should be readonly queries. - if (request.getMethod() != HTTPServerRequest::HTTP_POST) - { - if (settings.readonly == 0) - context->setSetting("readonly", 2); - } - - bool has_external_data = startsWith(request.getContentType(), "multipart/form-data"); - - if (has_external_data) - { - /// Skip unneeded parameters to avoid confusing them later with context settings or query parameters. - reserved_param_suffixes.reserve(3); - /// It is a bug and ambiguity with `date_time_input_format` and `low_cardinality_allow_in_native_format` formats/settings. - reserved_param_suffixes.emplace_back("_format"); - reserved_param_suffixes.emplace_back("_types"); - reserved_param_suffixes.emplace_back("_structure"); - } - - std::string database = request.get("X-ClickHouse-Database", ""); - std::string default_format = request.get("X-ClickHouse-Format", ""); - SettingsChanges settings_changes; for (const auto & [key, value] : params) { - if (key == "database") - { - if (database.empty()) - database = value; - } - else if (key == "default_format") - { - if (default_format.empty()) - default_format = value; - } - else if (param_could_be_skipped(key)) - { - } - else + if (!param_could_be_skipped(key)) { /// Other than query parameters are treated as settings. if (!customizeQueryParam(context, key, value)) @@ -826,15 +413,9 @@ void HTTPHandler::processQuery( } } - if (!database.empty()) - context->setCurrentDatabase(database); - - if (!default_format.empty()) - context->setDefaultFormat(default_format); - - /// For external data we also want settings context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); context->applySettingsChanges(settings_changes); + const auto & settings = context->getSettingsRef(); /// Set the query id supplied by the user, if any, and also update the OpenTelemetry fields. context->setCurrentQueryId(params.get("query_id", request.get("X-ClickHouse-Query-Id", ""))); @@ -936,7 +517,7 @@ void HTTPHandler::processQuery( { bool with_stacktrace = (params.getParsed("stacktrace", false) && server.config().getBool("enable_http_stacktrace", true)); ExecutionStatus status = ExecutionStatus::fromCurrentException("", with_stacktrace); - formatExceptionForClient(status.code, request, response, used_output); + setHTTPResponseStatusAndHeadersForException(status.code, request, response, used_output.out_holder.get(), log); current_output_format.setException(status.message); current_output_format.finalize(); used_output.exception_is_written = true; @@ -970,7 +551,7 @@ void HTTPHandler::trySendExceptionToClient( const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) try { - formatExceptionForClient(exception_code, request, response, used_output); + setHTTPResponseStatusAndHeadersForException(exception_code, request, response, used_output.out_holder.get(), log); if (!used_output.out_holder && !used_output.exception_is_written) { @@ -1032,38 +613,6 @@ catch (...) used_output.cancel(); } -void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) -{ - if (used_output.out_holder) - used_output.out_holder->setExceptionCode(exception_code); - else - response.set("X-ClickHouse-Exception-Code", toString(exception_code)); - - /// FIXME: make sure that no one else is reading from the same stream at the moment. - - /// If HTTP method is POST and Keep-Alive is turned on, we should try to read the whole request body - /// to avoid reading part of the current request body in the next request. - if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive() - && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED) - { - try - { - if (!request.getStream().eof()) - request.getStream().ignoreAll(); - } - catch (...) - { - tryLogCurrentException(log, "Cannot read remaining request body during exception handling"); - response.setKeepAlive(false); - } - } - - if (exception_code == ErrorCodes::REQUIRED_PASSWORD) - response.requireAuthentication("ClickHouse server HTTP API"); - else - response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code)); -} - void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) { setThreadName("HTTPHandler"); diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index c78c45826f0..6580b317f6e 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -173,12 +173,6 @@ private: HTTPServerResponse & response, Output & used_output); - void formatExceptionForClient( - int exception_code, - HTTPServerRequest & request, - HTTPServerResponse & response, - Output & used_output); - static void pushDelayedResults(Output & used_output); }; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index fccea9e258e..ac1423f87c1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -888,11 +888,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { - squashing.setHeader(state.block_for_insert.cloneEmpty()); - auto result_chunk = Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); - if (result_chunk) + squashing.header = state.block_for_insert; + auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); + if (planned_chunk.hasChunkInfo()) { - auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); + Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -901,13 +902,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - Chunk result_chunk = Squashing::squash(squashing.flush()); - if (!result_chunk) - { - return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context); - } + auto planned_chunk = squashing.flush(); + Chunk result_chunk; + if (planned_chunk.hasChunkInfo()) + result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); + auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 8791668cd89..e556bda2561 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -134,7 +134,7 @@ DistributedSink::DistributedSink( } -void DistributedSink::consume(Chunk & chunk) +void DistributedSink::consume(Chunk chunk) { if (is_first_chunk) { @@ -142,7 +142,7 @@ void DistributedSink::consume(Chunk & chunk) is_first_chunk = false; } - auto ordinary_block = getHeader().cloneWithColumns(chunk.getColumns()); + auto ordinary_block = getHeader().cloneWithColumns(chunk.detachColumns()); if (insert_sync) writeSync(ordinary_block); @@ -420,13 +420,7 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// to resolve tables (in InterpreterInsertQuery::getTable()) auto copy_query_ast = query_ast->clone(); - InterpreterInsertQuery interp( - copy_query_ast, - job.local_context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized); auto block_io = interp.execute(); job.pipeline = std::move(block_io.pipeline); @@ -721,13 +715,7 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const try { - InterpreterInsertQuery interp( - query_ast, - context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interp(query_ast, context, allow_materialized); auto block_io = interp.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index 5b7396f2c6f..a4c95633595 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -49,7 +49,7 @@ public: const Names & columns_to_send_); String getName() const override { return "DistributedSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 0f9bd8b6ff9..abd4b4ce23b 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -740,14 +740,7 @@ bool StorageFileLog::streamToViews() auto new_context = Context::createCopy(getContext()); - InterpreterInsertQuery interpreter( - insert, - new_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); - + InterpreterInsertQuery interpreter(insert, new_context, false, true, true); auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 809401bb279..f5c5d093ce1 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1099,13 +1099,7 @@ bool StorageKafka::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - kafka_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index 9803fa0a160..792133ced64 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -71,9 +71,9 @@ public: new_hash.reset(); } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); block.updateHash(*new_hash); new_blocks->push_back(std::move(block)); } diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 82759e8a851..57a1ea302f9 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,7 +21,6 @@ limitations under the License. */ #include #include #include -#include #include #include #include @@ -331,7 +330,7 @@ Pipe StorageLiveView::watch( return reader; } -void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) +void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) { auto output = std::make_shared(*this); @@ -408,21 +407,6 @@ void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Ch builder = interpreter.buildQueryPipeline(); } - builder.addSimpleTransform([&](const Block & cur_header) - { - return std::make_shared(chunk_infos.clone(), cur_header); - }); - - String live_view_id = live_view.getStorageID().hasUUID() ? toString(live_view.getStorageID().uuid) : live_view.getStorageID().getFullNameNotQuoted(); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(live_view_id, stream_header); - }); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header); - }); - builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 12d8e898347..91daac32c7b 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -118,7 +118,7 @@ public: return 0; } - void writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); + void writeBlock(const Block & block, ContextPtr context); void refresh(); diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index ff5214a5e51..bc8cb0ce69a 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -377,13 +377,7 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr +#include namespace DB { @@ -71,9 +72,21 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( Columns IMergeTreeDataPartWriter::releaseIndexColumns() { - return Columns( - std::make_move_iterator(index_columns.begin()), - std::make_move_iterator(index_columns.end())); + /// The memory for index was allocated without thread memory tracker. + /// We need to deallocate it in shrinkToFit without memory tracker as well. + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + + Columns result; + result.reserve(index_columns.size()); + + for (auto & column : index_columns) + { + column->shrinkToFit(); + result.push_back(std::move(column)); + } + + index_columns.clear(); + return result; } SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 909a8a48bda..467a5c82141 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7077,6 +7077,20 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( /// with new analyzer, Planner make decision regarding parallel replicas usage, and so about processing stage on reading if (!query_context->getSettingsRef().allow_experimental_analyzer) { + const auto & settings = query_context->getSettingsRef(); + if (query_context->canUseParallelReplicasCustomKey()) + { + if (query_context->getClientInfo().distributed_depth > 0) + return QueryProcessingStage::FetchColumns; + + if (!supportsReplication() && !settings.parallel_replicas_for_non_replicated_merge_tree) + return QueryProcessingStage::Enum::FetchColumns; + + if (to_stage >= QueryProcessingStage::WithMergeableState + && query_context->canUseParallelReplicasCustomKeyForCluster(*query_context->getClusterForParallelReplicas())) + return QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; + } + if (query_context->getClientInfo().collaborate_with_initiator) return QueryProcessingStage::Enum::FetchColumns; @@ -7088,7 +7102,7 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( return QueryProcessingStage::Enum::WithMergeableState; /// For non-replicated MergeTree we allow them only if parallel_replicas_for_non_replicated_merge_tree is enabled - if (query_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) + if (settings.parallel_replicas_for_non_replicated_merge_tree) return QueryProcessingStage::Enum::WithMergeableState; } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 21d046c76f2..52d12c9db7d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -154,7 +154,8 @@ void writeColumnSingleGranule( const SerializationPtr & serialization, ISerialization::OutputStreamGetter stream_getter, size_t from_row, - size_t number_of_rows) + size_t number_of_rows, + const MergeTreeWriterSettings & settings) { ISerialization::SerializeBinaryBulkStatePtr state; ISerialization::SerializeBinaryBulkSettings serialize_settings; @@ -162,6 +163,7 @@ void writeColumnSingleGranule( serialize_settings.getter = stream_getter; serialize_settings.position_independent_encoding = true; serialize_settings.low_cardinality_max_dictionary_size = 0; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX; serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state); @@ -259,7 +261,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G writeColumnSingleGranule( block.getByName(name_and_type->name), getSerialization(name_and_type->name), - stream_getter, granule.start_row, granule.rows_to_write); + stream_getter, granule.start_row, granule.rows_to_write, settings); /// Each type always have at least one substream prev_stream->hashing_buf.next(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index a576720294f..5c9191dbb54 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -254,6 +254,12 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() index_compressor_stream = std::make_unique(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size); index_source_hashing_stream = std::make_unique(*index_compressor_stream); } + + const auto & primary_key_types = metadata_snapshot->getPrimaryKey().data_types; + index_serializations.reserve(primary_key_types.size()); + + for (const auto & type : primary_key_types) + index_serializations.push_back(type->getDefaultSerialization()); } } @@ -299,22 +305,33 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() store = std::make_shared(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } + skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); skip_index_accumulated_marks.push_back(0); } } +void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row) +{ + chassert(index_block.columns() == index_serializations.size()); + auto & index_stream = compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream; + + for (size_t i = 0; i < index_block.columns(); ++i) + { + const auto & column = index_block.getByPosition(i).column; + + index_columns[i]->insertFrom(*column, row); + index_serializations[i]->serializeBinary(*column, row, index_stream, {}); + } +} + void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write) { - size_t primary_columns_num = primary_index_block.columns(); + if (!metadata_snapshot->hasPrimaryKey()) + return; + if (index_columns.empty()) - { - index_types = primary_index_block.getDataTypes(); - index_columns.resize(primary_columns_num); - last_block_index_columns.resize(primary_columns_num); - for (size_t i = 0; i < primary_columns_num; ++i) - index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty(); - } + index_columns = primary_index_block.cloneEmptyColumns(); { /** While filling index (index_columns), disable memory tracker. @@ -328,22 +345,14 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc /// Write index. The index contains Primary Key value for each `index_granularity` row. for (const auto & granule : granules_to_write) { - if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start) - { - for (size_t j = 0; j < primary_columns_num; ++j) - { - const auto & primary_column = primary_index_block.getByPosition(j); - index_columns[j]->insertFrom(*primary_column.column, granule.start_row); - primary_column.type->getDefaultSerialization()->serializeBinary( - *primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - } + if (granule.mark_on_start) + calculateAndSerializePrimaryIndexRow(primary_index_block, granule.start_row); } } - /// store last index row to write final mark at the end of column - for (size_t j = 0; j < primary_columns_num; ++j) - last_block_index_columns[j] = primary_index_block.getByPosition(j).column; + /// Store block with last index row to write final mark at the end of column + if (with_final_mark) + last_index_block = primary_index_block; } void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block) @@ -420,17 +429,11 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat if (index_file_hashing_stream) { - if (write_final_mark) + if (write_final_mark && last_index_block) { - for (size_t j = 0; j < index_columns.size(); ++j) - { - const auto & column = *last_block_index_columns[j]; - size_t last_row_number = column.size() - 1; - index_columns[j]->insertFrom(column, last_row_number); - index_types[j]->getDefaultSerialization()->serializeBinary( - column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); - } - last_block_index_columns.clear(); + MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + calculateAndSerializePrimaryIndexRow(last_index_block, last_index_block.rows() - 1); + last_index_block.clear(); } if (compress_primary_key) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index bdf0fdb7f32..8d84442981e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -173,10 +173,10 @@ protected: std::unique_ptr index_source_hashing_stream; bool compress_primary_key; - DataTypes index_types; - /// Index columns from the last block - /// It's written to index file in the `writeSuffixAndFinalizePart` method - Columns last_block_index_columns; + /// Last block with index columns. + /// It's written to index file in the `writeSuffixAndFinalizePart` method. + Block last_index_block; + Serializations index_serializations; bool data_written = false; @@ -193,6 +193,7 @@ private: void initStatistics(); virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0; + void calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row); struct ExecutionStatistics { diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index a69d21de8e7..74ea89a8864 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -433,6 +433,7 @@ void MergeTreeDataPartWriterWide::writeColumn( if (inserted) { ISerialization::SerializeBinaryBulkSettings serialize_settings; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second); } @@ -441,6 +442,7 @@ void MergeTreeDataPartWriterWide::writeColumn( serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; for (const auto & granule : granules) { @@ -630,6 +632,7 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(MergeTreeDataPartChecksums & ISerialization::SerializeBinaryBulkSettings serialize_settings; serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part; + serialize_settings.use_compact_variant_discriminators_serialization = settings.use_compact_variant_discriminators_serialization; WrittenOffsetColumns offset_columns; if (rows_written_in_last_mark > 0) { diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index a9125b4047e..04171656fcf 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -76,6 +76,7 @@ struct MergeTreeWriterSettings , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation) , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size) , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0) + , use_compact_variant_discriminators_serialization(storage_settings->use_compact_variant_discriminators_serialization) { } @@ -98,6 +99,7 @@ struct MergeTreeWriterSettings size_t low_cardinality_max_dictionary_size; bool low_cardinality_use_single_dictionary_for_part; + bool use_compact_variant_discriminators_serialization; }; } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index a6298aab3d9..78b67de1a7e 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -145,12 +145,8 @@ ChunkAndProgress MergeTreeSelectProcessor::read() ordered_columns.push_back(res.block.getByName(name).column); } - auto chunk = Chunk(ordered_columns, res.row_count); - if (add_part_level) - chunk.getChunkInfos().add(std::make_shared(task->getInfo().data_part->info.level)); - return ChunkAndProgress{ - .chunk = std::move(chunk), + .chunk = Chunk(ordered_columns, res.row_count, add_part_level ? std::make_shared(task->getInfo().data_part->info.level) : nullptr), .num_read_rows = res.num_read_rows, .num_read_bytes = res.num_read_bytes, .is_finished = false}; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 4f90f7131da..02f8d6f4f6a 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -264,10 +264,7 @@ try ++it; } - auto result = Chunk(std::move(res_columns), rows_read); - if (add_part_level) - result.getChunkInfos().add(std::make_shared(data_part->info.level)); - return result; + return Chunk(std::move(res_columns), rows_read, add_part_level ? std::make_shared(data_part->info.level) : nullptr); } } else diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 1f8d6abebd2..c0afd781c7e 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -43,6 +43,7 @@ struct Settings; M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \ M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \ + M(Bool, use_compact_variant_discriminators_serialization, true, "Use compact version of Variant discriminators serialization.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index d8cfce1ca99..05751e0fa6f 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,27 +1,14 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include - -#include +#include +#include +#include namespace ProfileEvents { extern const Event DuplicatedInsertedBlocks; } -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace DB { @@ -71,12 +58,12 @@ void MergeTreeSink::onCancel() { } -void MergeTreeSink::consume(Chunk & chunk) +void MergeTreeSink::consume(Chunk chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); @@ -89,18 +76,6 @@ void MergeTreeSink::consume(Chunk & chunk) size_t streams = 0; bool support_parallel_write = false; - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); - - const bool need_to_define_dedup_token = !token_info->isDefined(); - - String block_dedup_token; - if (token_info->isDefined()) - block_dedup_token = token_info->getToken(); - for (auto & current_block : part_blocks) { ProfileEvents::Counters part_counters; @@ -125,16 +100,22 @@ void MergeTreeSink::consume(Chunk & chunk) if (!temp_part.part) continue; - if (need_to_define_dedup_token) - { - chassert(temp_part.part); - const auto hash_value = temp_part.part->getPartBlockIDHash(); - token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); - } - if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; + String block_dedup_token; + if (storage.getDeduplicationLog()) + { + const String & dedup_token = settings.insert_deduplication_token; + if (!dedup_token.empty()) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); + ++chunk_dedup_seqnum; + } + } + size_t max_insert_delayed_streams_for_parallel_write; if (settings.max_insert_delayed_streams_for_parallel_write.changed) @@ -146,7 +127,6 @@ void MergeTreeSink::consume(Chunk & chunk) /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); - if (streams > max_insert_delayed_streams_for_parallel_write) { finishDelayedChunk(); @@ -163,16 +143,11 @@ void MergeTreeSink::consume(Chunk & chunk) { .temp_part = std::move(temp_part), .elapsed_ns = elapsed_ns, - .block_dedup_token = block_dedup_token, + .block_dedup_token = std::move(block_dedup_token), .part_counters = std::move(part_counters), }); } - if (need_to_define_dedup_token) - { - token_info->finishChunkHashes(); - } - finishDelayedChunk(); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); @@ -185,8 +160,6 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; - const Settings & settings = context->getSettingsRef(); - for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -205,8 +178,7 @@ void MergeTreeSink::finishDelayedChunk() storage.fillNewPartName(part, lock); auto * deduplication_log = storage.getDeduplicationLog(); - - if (settings.insert_deduplicate && deduplication_log) + if (deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); auto res = deduplication_log->addPart(block_id, part->info); diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 90976020d52..cf6715a3415 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -25,7 +25,7 @@ public: ~MergeTreeSink() override; String getName() const override { return "MergeTreeSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onStart() override; void onFinish() override; void onCancel() override; @@ -36,6 +36,7 @@ private: size_t max_parts_per_block; ContextPtr context; StorageSnapshotPtr storage_snapshot; + UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token UInt64 num_blocks_processed = 0; /// We can delay processing for previous chunk and start writing a new one. diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 164658c914e..9d696b70d9f 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -93,6 +93,7 @@ struct MergedBlockOutputStream::Finalizer::Impl void MergedBlockOutputStream::Finalizer::finish() { std::unique_ptr to_finish = std::move(impl); + impl.reset(); if (to_finish) to_finish->finish(); } @@ -130,7 +131,19 @@ MergedBlockOutputStream::Finalizer::Finalizer(Finalizer &&) noexcept = default; MergedBlockOutputStream::Finalizer & MergedBlockOutputStream::Finalizer::operator=(Finalizer &&) noexcept = default; MergedBlockOutputStream::Finalizer::Finalizer(std::unique_ptr impl_) : impl(std::move(impl_)) {} -MergedBlockOutputStream::Finalizer::~Finalizer() = default; +MergedBlockOutputStream::Finalizer::~Finalizer() +{ + try + { + if (impl) + finish(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + void MergedBlockOutputStream::finalizePart( const MergeTreeMutableDataPartPtr & new_part, diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 3dbcb5e5bda..a552ee89aee 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1297,7 +1297,6 @@ void PartMergerWriter::prepare() bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Block cur_block; - Block projection_header; if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) @@ -1315,12 +1314,14 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); + projection_squashes[i].header = block_to_squash; + Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); - Chunk squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); - if (squashed_chunk) + if (planned_chunk.hasChunkInfo()) { - auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = block_to_squash.cloneWithColumns(projection_chunk.getColumns()); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); @@ -1341,10 +1342,12 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squashes[i]; - auto squashed_chunk = Squashing::squash(projection_squash_plan.flush()); - if (squashed_chunk) + auto planned_chunk = projection_squash_plan.flush(); + if (planned_chunk.hasChunkInfo()) { - auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index bbae054fbed..4b4f4c33e7d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,25 +1,21 @@ +#include +#include +#include +#include +#include #include "Common/Exception.h" #include #include #include -#include #include -#include -#include -#include -#include -#include -#include -#include +#include #include #include -#include -#include -#include - +#include +#include +#include +#include #include -#include - namespace ProfileEvents { @@ -257,12 +253,12 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const } template -void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) +void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); const auto & settings = context->getSettingsRef(); @@ -288,25 +284,13 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) if constexpr (async_insert) { - const auto async_insert_info_ptr = chunk.getChunkInfos().get(); - if (async_insert_info_ptr) + const auto & chunk_info = chunk.getChunkInfo(); + if (const auto * async_insert_info_ptr = typeid_cast(chunk_info.get())) async_insert_info = std::make_shared(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens); else throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } - String block_dedup_token; - auto token_info = chunk.getChunkInfos().get(); - if (!token_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", - storage.getStorageID().getNameForLogs()); - - const bool need_to_define_dedup_token = !token_info->isDefined(); - - if (token_info->isDefined()) - block_dedup_token = token_info->getToken(); - auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; @@ -358,10 +342,23 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) } else { + if (deduplicate) { + String block_dedup_token; + /// We add the hash from the data and partition identifier to deduplication ID. /// That is, do not insert the same data to the same partition twice. + + const String & dedup_token = settings.insert_deduplication_token; + if (!dedup_token.empty()) + { + /// multiple blocks can be inserted within the same insert query + /// an ordinal number is added to dedup token to generate a distinctive block id for each block + block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); + ++chunk_dedup_seqnum; + } + block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token); LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num)); } @@ -369,13 +366,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) { LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } - - if (need_to_define_dedup_token) - { - chassert(temp_part.part); - const auto hash_value = temp_part.part->getPartBlockIDHash(); - token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); - } } profile_events_scope.reset(); @@ -421,15 +411,17 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) )); } - if (need_to_define_dedup_token) - { - token_info->finishChunkHashes(); - } - finishDelayedChunk(zookeeper); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); + /// If deduplicated data should not be inserted into MV, we need to set proper + /// value for `last_block_is_duplicate`, which is possible only after the part is committed. + /// Othervide we can delay commit. + /// TODO: we can also delay commit if there is no MVs. + if (!settings.deduplicate_blocks_in_dependent_materialized_views) + finishDelayedChunk(zookeeper); + ++num_blocks_processed; } @@ -439,6 +431,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF if (!delayed_chunk) return; + last_block_is_duplicate = false; + for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -451,6 +445,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF { bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second; + last_block_is_duplicate = last_block_is_duplicate || deduplicated; + /// Set a special error code if the block is duplicate int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); @@ -539,7 +535,7 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl ProfileEventsScope profile_events_scope; String original_part_dir = part->getDataPartStorage().getPartDirectory(); - auto try_rollback_part_rename = [this, &part, &original_part_dir] () + auto try_rollback_part_rename = [this, &part, &original_part_dir]() { if (original_part_dir == part->getDataPartStorage().getPartDirectory()) return; @@ -1155,16 +1151,8 @@ void ReplicatedMergeTreeSinkImpl::onStart() template void ReplicatedMergeTreeSinkImpl::onFinish() { - const auto & settings = context->getSettingsRef(); - - ZooKeeperWithFaultInjectionPtr zookeeper = ZooKeeperWithFaultInjection::createInstance( - settings.insert_keeper_fault_injection_probability, - settings.insert_keeper_fault_injection_seed, - storage.getZooKeeper(), - "ReplicatedMergeTreeSink::onFinish", - log); - - finishDelayedChunk(zookeeper); + auto zookeeper = storage.getZooKeeper(); + finishDelayedChunk(std::make_shared(zookeeper)); } template diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 7d025361717..39623c20584 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -51,7 +51,7 @@ public: ~ReplicatedMergeTreeSinkImpl() override; void onStart() override; - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; String getName() const override { return "ReplicatedMergeTreeSink"; } @@ -59,6 +59,16 @@ public: /// For ATTACHing existing data on filesystem. bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); + /// For proper deduplication in MaterializedViews + bool lastBlockIsDuplicate() const override + { + /// If MV is responsible for deduplication, block is not considered duplicating. + if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) + return false; + + return last_block_is_duplicate; + } + struct DelayedChunk; private: std::vector detectConflictsInAsyncBlockIDs(const std::vector & ids); @@ -116,6 +126,7 @@ private: bool allow_attach_while_readonly = false; bool quorum_parallel = false; const bool deduplicate = true; + bool last_block_is_duplicate = false; UInt64 num_blocks_processed = 0; LoggerPtr log; diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 36899011e33..4fb81d69070 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -40,7 +40,7 @@ void MessageQueueSink::onFinish() producer->finish(); } -void MessageQueueSink::consume(Chunk & chunk) +void MessageQueueSink::consume(Chunk chunk) { const auto & columns = chunk.getColumns(); if (columns.empty()) diff --git a/src/Storages/MessageQueueSink.h b/src/Storages/MessageQueueSink.h index 4a9248c6c4d..b3c1e61734f 100644 --- a/src/Storages/MessageQueueSink.h +++ b/src/Storages/MessageQueueSink.h @@ -35,7 +35,7 @@ public: String getName() const override { return storage_name + "Sink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onStart() override; void onFinish() override; diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 8f0e2d76473..0b88a9e8929 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -644,13 +644,7 @@ bool StorageNATS::streamToViews() insert->table_id = table_id; // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - nats_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, nats_context, false, true, true); auto block_io = interpreter.execute(); auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index d2bdd0af302..f2f6eac333c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -39,12 +39,12 @@ StorageObjectStorageSink::StorageObjectStorageSink( configuration->format, *write_buf, sample_block, context, format_settings_); } -void StorageObjectStorageSink::consume(Chunk & chunk) +void StorageObjectStorageSink::consume(Chunk chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void StorageObjectStorageSink::onCancel() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 6ab531bb21a..e0081193686 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "StorageObjectStorageSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onCancel() override; diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 14b828e7268..4388864434e 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -454,13 +454,7 @@ bool StorageObjectStorageQueue::streamToViews() while (!shutdown_called && !file_iterator->isFinished()) { - InterpreterInsertQuery interpreter( - insert, - queue_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, queue_context, false, true, true); auto block_io = interpreter.execute(); auto read_from_format_info = prepareReadingFromFormat( block_io.pipeline.getHeader().getNames(), diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index ee2570756ed..09b009b26d8 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -51,7 +51,7 @@ SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key) return it->second; } -void PartitionedSink::consume(Chunk & chunk) +void PartitionedSink::consume(Chunk chunk) { const auto & columns = chunk.getColumns(); @@ -104,7 +104,7 @@ void PartitionedSink::consume(Chunk & chunk) for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index) { auto sink = getSinkForPartitionKey(partition_key); - sink->consume(partition_index_to_chunk[partition_index]); + sink->consume(std::move(partition_index_to_chunk[partition_index])); } } diff --git a/src/Storages/PartitionedSink.h b/src/Storages/PartitionedSink.h index fcd67556dc9..68edeb6fd73 100644 --- a/src/Storages/PartitionedSink.h +++ b/src/Storages/PartitionedSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "PartitionedSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onException(std::exception_ptr exception) override; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index 44479bd01e2..ba3cc6f58d0 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -697,13 +697,7 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->table_id = storage->getStorageID(); insert->columns = std::make_shared(buffer->columns_ast); - InterpreterInsertQuery interpreter( - insert, - insert_context, - /* allow_materialized */ true, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, insert_context, true); auto io = interpreter.execute(); auto input = std::make_shared( result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index f632e553a0d..2bb1e2dde0d 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -437,13 +437,7 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection auto insert_context = materialized_storage->getNestedTableContext(); - InterpreterInsertQuery interpreter( - insert, - insert_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, insert_context); auto block_io = interpreter.execute(); const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index f3d2aff68c8..e4b19992151 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1129,13 +1129,7 @@ bool StorageRabbitMQ::tryStreamToViews() } // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter( - insert, - rabbitmq_context, - /* allow_materialized */ false, - /* no_squash */ true, - /* no_destination */ true, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true); auto block_io = interpreter.execute(); block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 4b5188ca9f2..90792c59d38 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -218,7 +218,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali return {std::move(serialized_key_column), std::move(serialized_value_column)}; } -void EmbeddedRocksDBBulkSink::consume(Chunk & chunk_) +void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) { std::vector chunks_to_write = squash(std::move(chunk_)); @@ -247,10 +247,7 @@ void EmbeddedRocksDBBulkSink::onFinish() { /// If there is any data left, write it. if (!chunks.empty()) - { - Chunk empty; - consume(empty); - } + consume({}); } String EmbeddedRocksDBBulkSink::getTemporarySSTFilePath() diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 64190c8c86f..1f548e7813d 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -32,7 +32,7 @@ public: ~EmbeddedRocksDBBulkSink() override; - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index 1f7f6939f40..c451cfd1bf5 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -29,7 +29,7 @@ EmbeddedRocksDBSink::EmbeddedRocksDBSink( serializations = getHeader().getSerializations(); } -void EmbeddedRocksDBSink::consume(Chunk & chunk) +void EmbeddedRocksDBSink::consume(Chunk chunk) { auto rows = chunk.getNumRows(); const auto & columns = chunk.getColumns(); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.h b/src/Storages/RocksDB/EmbeddedRocksDBSink.h index 2e1e0c7b429..011322df829 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.h @@ -17,7 +17,7 @@ public: StorageEmbeddedRocksDB & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; String getName() const override { return "EmbeddedRocksDBSink"; } private: diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 3473166a080..b9d3e071b6c 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -313,8 +313,7 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt Block block; while (executor.pull(block)) { - auto chunk = Chunk(block.getColumns(), block.rows()); - sink->consume(chunk); + sink->consume(Chunk{block.getColumns(), block.rows()}); } } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index b064fba223a..a3f6b6afc5d 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -607,7 +607,7 @@ public: String getName() const override { return "BufferSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { size_t rows = chunk.getNumRows(); if (!rows) @@ -1020,13 +1020,7 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl auto insert_context = Context::createCopy(getContext()); insert_context->makeQueryContext(); - InterpreterInsertQuery interpreter( - insert, - insert_context, - allow_materialized, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized}; auto block_io = interpreter.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 67586985ce8..2cf3ced2904 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -426,7 +426,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( query_info.cluster = cluster; - if (!local_context->canUseParallelReplicasCustomKey(*cluster)) + if (!local_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) { if (nodes > 1 && settings.optimize_skip_unused_shards) { @@ -839,7 +839,9 @@ void StorageDistributed::read( SelectQueryInfo modified_query_info = query_info; - if (local_context->getSettingsRef().allow_experimental_analyzer) + const auto & settings = local_context->getSettingsRef(); + + if (settings.allow_experimental_analyzer) { StorageID remote_storage_id = StorageID::createEmpty(); if (!remote_table_function_ptr) @@ -864,7 +866,7 @@ void StorageDistributed::read( header = InterpreterSelectQuery(modified_query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } - if (!local_context->getSettingsRef().allow_experimental_analyzer) + if (!settings.allow_experimental_analyzer) { modified_query_info.query = ClusterProxy::rewriteSelectQuery( local_context, modified_query_info.query, @@ -874,7 +876,7 @@ void StorageDistributed::read( /// Return directly (with correct header) if no shard to query. if (modified_query_info.getCluster()->getShardsInfo().empty()) { - if (local_context->getSettingsRef().allow_experimental_analyzer) + if (settings.allow_experimental_analyzer) return; Pipe pipe(std::make_shared(header)); @@ -893,27 +895,8 @@ void StorageDistributed::read( storage_snapshot, processed_stage); - const auto & settings = local_context->getSettingsRef(); - - ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator; - if (local_context->canUseParallelReplicasCustomKey(*modified_query_info.getCluster())) - { - if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *local_context)) - { - additional_shard_filter_generator = - [my_custom_key_ast = std::move(custom_key_ast), - column_description = this->getInMemoryMetadataPtr()->columns, - custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, - custom_key_range_lower = settings.parallel_replicas_custom_key_range_lower.value, - custom_key_range_upper = settings.parallel_replicas_custom_key_range_upper.value, - context = local_context, - replica_count = modified_query_info.getCluster()->getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr - { - return getCustomKeyFilterForParallelReplica( - replica_count, replica_num - 1, my_custom_key_ast, {custom_key_type, custom_key_range_lower, custom_key_range_upper}, column_description, context); - }; - } - } + auto shard_filter_generator = ClusterProxy::getShardFilterGeneratorForCustomKey( + *modified_query_info.getCluster(), local_context, getInMemoryMetadataPtr()->columns); ClusterProxy::executeQuery( query_plan, @@ -928,7 +911,7 @@ void StorageDistributed::read( sharding_key_expr, sharding_key_column_name, distributed_settings, - additional_shard_filter_generator, + shard_filter_generator, /* is_remote_function= */ static_cast(owned_cluster)); /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier. @@ -1050,13 +1033,7 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter( - new_query, - query_context, - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(new_query, query_context); pipeline.addCompletedPipeline(interpreter.execute().pipeline); } else diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 3fb397c7b81..7f39ff615f0 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1778,12 +1778,12 @@ public: String getName() const override { return "StorageFileSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { std::lock_guard cancel_lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onCancel() override diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index c80e799a92b..20f99070000 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -119,10 +119,10 @@ public: std::string getName() const override { return "StorageKeeperMapSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -1248,10 +1248,7 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca Block block; while (executor.pull(block)) - { - auto chunk = Chunk(block.getColumns(), block.rows()); - sink->consume(chunk); - } + sink->consume(Chunk{block.getColumns(), block.rows()}); sink->finalize(strict); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 463694c63aa..de0324d7998 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -22,6 +21,7 @@ #include #include +#include "StorageLogSettings.h" #include #include #include @@ -341,7 +341,7 @@ public: } } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: @@ -398,9 +398,9 @@ private: }; -void LogSink::consume(Chunk & chunk) +void LogSink::consume(Chunk chunk) { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); metadata_snapshot->check(block, true); for (auto & stream : streams | boost::adaptors::map_values) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index b1bd7053c2e..f69c4adb552 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -63,7 +63,7 @@ public: String getName() const override { return "MemorySink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); storage_snapshot->metadata->check(block, true); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 94d7a33d0dd..20ad064f1fc 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1,51 +1,51 @@ -#include "StorageMergeTree.h" -#include "Core/QueryProcessingStage.h" -#include "Storages/MergeTree/IMergeTreeDataPart.h" +#include #include #include -#include -#include #include +#include #include -#include "Common/Exception.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include #include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include namespace DB @@ -220,24 +220,50 @@ void StorageMergeTree::read( { ClusterProxy::executeQueryWithParallelReplicas( query_plan, getStorageID(), processed_stage, query_info.query, local_context, query_info.storage_limits); + return; } - else - { - const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() - && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree - && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower); - if (auto plan = reader.read( - column_names, + if (local_context->canUseParallelReplicasCustomKey() && settings.parallel_replicas_for_non_replicated_merge_tree + && !settings.allow_experimental_analyzer && local_context->getClientInfo().distributed_depth == 0) + { + if (auto cluster = local_context->getClusterForParallelReplicas(); + local_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) + { + auto modified_query_info = query_info; + modified_query_info.cluster = std::move(cluster); + ClusterProxy::executeQueryWithParallelReplicasCustomKey( + query_plan, + getStorageID(), + std::move(modified_query_info), + getInMemoryMetadataPtr()->getColumns(), storage_snapshot, - query_info, - local_context, - max_block_size, - num_streams, - nullptr, - enable_parallel_reading)) - query_plan = std::move(*plan); + processed_stage, + query_info.query, + local_context); + return; + } + else + LOG_WARNING( + log, + "Parallel replicas with custom key will not be used because cluster defined by 'cluster_for_parallel_replicas' ('{}') has " + "multiple shards", + cluster->getName()); } + + const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower() + && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree + && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower); + + if (auto plan = reader.read( + column_names, + storage_snapshot, + query_info, + local_context, + max_block_size, + num_streams, + nullptr, + enable_parallel_reading)) + query_plan = std::move(*plan); } std::optional StorageMergeTree::totalRows(const Settings &) const @@ -1551,6 +1577,12 @@ bool StorageMergeTree::optimize( { assertNotReadonly(); + if (deduplicate && getInMemoryMetadataPtr()->hasProjections()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "OPTIMIZE DEDUPLICATE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + getStorageID().getTableName()); + if (deduplicate) { if (deduplicate_by_columns.empty()) diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index e0818fafae9..62a2a048642 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -106,12 +107,12 @@ public: String getName() const override { return "StorageMongoDBSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { Poco::MongoDB::Database db(db_name); Poco::MongoDB::Document::Vector documents; - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); size_t num_rows = block.rows(); size_t num_cols = block.columns(); diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 2a8a7bd2ee7..da391909dff 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -151,9 +151,9 @@ public: String getName() const override { return "StorageMySQLSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); auto blocks = splitBlocks(block, max_batch_rows); mysqlxx::Transaction trans(entry); try diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index cdfeab62b58..a8713c61e4d 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -227,9 +227,9 @@ public: String getName() const override { return "PostgreSQLSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); if (!inserter) { if (on_conflict.empty()) diff --git a/src/Storages/StorageRedis.cpp b/src/Storages/StorageRedis.cpp index 1a275320f43..83bb3c606c9 100644 --- a/src/Storages/StorageRedis.cpp +++ b/src/Storages/StorageRedis.cpp @@ -147,7 +147,7 @@ class RedisSink : public SinkToStorage public: RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; String getName() const override { return "RedisSink"; } private: @@ -169,10 +169,10 @@ RedisSink::RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadat } } -void RedisSink::consume(Chunk & chunk) +void RedisSink::consume(Chunk chunk) { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.getColumns()); + auto block = getHeader().cloneWithColumns(chunk.detachColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -567,8 +567,7 @@ void StorageRedis::mutate(const MutationCommands & commands, ContextPtr context_ Block block; while (executor.pull(block)) { - Chunk chunk(block.getColumns(), block.rows()); - sink->consume(chunk); + sink->consume(Chunk{block.getColumns(), block.rows()}); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a127384c03c..13b7cc582a9 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5460,13 +5460,45 @@ void StorageReplicatedMergeTree::read( /// 2. Do not read parts that have not yet been written to the quorum of the replicas. /// For this you have to synchronously go to ZooKeeper. if (settings.select_sequential_consistency) + { readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); + return; + } /// reading step for parallel replicas with new analyzer is built in Planner, so don't do it here - else if (local_context->canUseParallelReplicasOnInitiator() && !settings.allow_experimental_analyzer) + if (local_context->canUseParallelReplicasOnInitiator() && !settings.allow_experimental_analyzer) + { readParallelReplicasImpl(query_plan, column_names, query_info, local_context, processed_stage); - else - readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); -} + return; + } + + if (local_context->canUseParallelReplicasCustomKey() && !settings.allow_experimental_analyzer + && local_context->getClientInfo().distributed_depth == 0) + { + if (auto cluster = local_context->getClusterForParallelReplicas(); + local_context->canUseParallelReplicasCustomKeyForCluster(*cluster)) + { + auto modified_query_info = query_info; + modified_query_info.cluster = std::move(cluster); + ClusterProxy::executeQueryWithParallelReplicasCustomKey( + query_plan, + getStorageID(), + std::move(modified_query_info), + getInMemoryMetadataPtr()->getColumns(), + storage_snapshot, + processed_stage, + query_info.query, + local_context); + return; + } + else + LOG_WARNING( + log, + "Parallel replicas with custom key will not be used because cluster defined by 'cluster_for_parallel_replicas' ('{}') has " + "multiple shards", + cluster->getName()); + } + + readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); } void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl( QueryPlan & query_plan, @@ -5746,6 +5778,12 @@ bool StorageReplicatedMergeTree::optimize( if (!is_leader) throw Exception(ErrorCodes::NOT_A_LEADER, "OPTIMIZE cannot be done on this replica because it is not a leader"); + if (deduplicate && getInMemoryMetadataPtr()->hasProjections()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "OPTIMIZE DEDUPLICATE query is not supported for table {} as it has projections. " + "User should drop all the projections manually before running the query", + getStorageID().getTableName()); + if (cleanup) { if (!getSettings()->allow_experimental_replacing_merge_with_cleanup) diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 85417a2f2a4..179e4cee199 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -141,7 +141,7 @@ public: String getName() const override { return "SQLiteSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString sqlbuf; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 0d094c15880..5b7f9fc0ac2 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -44,7 +44,7 @@ public: const String & backup_file_name_, bool persistent_); String getName() const override { return "SetOrJoinSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onFinish() override; private: @@ -82,9 +82,9 @@ SetOrJoinSink::SetOrJoinSink( { } -void SetOrJoinSink::consume(Chunk & chunk) +void SetOrJoinSink::consume(Chunk chunk) { - Block block = getHeader().cloneWithColumns(chunk.getColumns()); + Block block = getHeader().cloneWithColumns(chunk.detachColumns()); table.insertBlock(block, getContext()); if (persistent) diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 9b6d9f041e1..8df87d6290f 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -226,9 +226,9 @@ public: } } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { - block_out->write(getHeader().cloneWithColumns(chunk.getColumns())); + block_out->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void onFinish() override diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 90e05c44e31..895da028fc2 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -565,12 +565,12 @@ StorageURLSink::StorageURLSink( } -void StorageURLSink::consume(Chunk & chunk) +void StorageURLSink::consume(Chunk chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.getColumns())); + writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); } void StorageURLSink::onCancel() diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 1804079e75f..fa7cc6eeeef 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -251,7 +251,7 @@ public: const String & method = Poco::Net::HTTPRequest::HTTP_POST); std::string getName() const override { return "StorageURLSink"; } - void consume(Chunk & chunk) override; + void consume(Chunk chunk) override; void onCancel() override; void onException(std::exception_ptr exception) override; void onFinish() override; diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index c9c606de049..cb46cd19517 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -119,7 +119,7 @@ public: ZooKeeperSink(const Block & header, ContextPtr context) : SinkToStorage(header), zookeeper(context->getZooKeeper()) { } String getName() const override { return "ZooKeeperSink"; } - void consume(Chunk & chunk) override + void consume(Chunk chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t rows = block.rows(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index e36247103c7..77e6ee9cb24 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -305,7 +304,7 @@ namespace public: explicit AddingAggregatedChunkInfoTransform(Block header) : ISimpleTransform(header, header, false) { } - void transform(Chunk & chunk) override { chunk.getChunkInfos().add(std::make_shared()); } + void transform(Chunk & chunk) override { chunk.setChunkInfo(std::make_shared()); } String getName() const override { return "AddingAggregatedChunkInfoTransform"; } }; @@ -690,13 +689,7 @@ inline void StorageWindowView::fire(UInt32 watermark) StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); - InterpreterInsertQuery interpreter( - insert, - getContext(), - /* allow_materialized */ false, - /* no_squash */ false, - /* no_destination */ false, - /* async_isnert */ false); + InterpreterInsertQuery interpreter(insert, getContext()); auto block_io = interpreter.execute(); auto pipe = Pipe(std::make_shared(blocks, header)); @@ -1420,7 +1413,7 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } void StorageWindowView::writeIntoWindowView( - StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) + StorageWindowView & window_view, const Block & block, ContextPtr local_context) { window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) @@ -1435,7 +1428,7 @@ void StorageWindowView::writeIntoWindowView( window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); } - Pipe pipe(std::make_shared(block)); + Pipe pipe(std::make_shared(block.cloneEmpty(), Chunk(block.getColumns(), block.rows()))); UInt32 lateness_bound = 0; UInt32 t_max_watermark = 0; @@ -1480,10 +1473,10 @@ void StorageWindowView::writeIntoWindowView( auto syntax_result = TreeRewriter(local_context).analyze(query, columns); auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, local_context).getActionsDAG(false); - pipe.addSimpleTransform([&](const Block & header_) + pipe.addSimpleTransform([&](const Block & header) { return std::make_shared( - header_, std::make_shared(filter_expression), + header, std::make_shared(filter_expression), filter_function->getColumnName(), true); }); } @@ -1538,30 +1531,6 @@ void StorageWindowView::writeIntoWindowView( QueryProcessingStage::WithMergeableState); builder = select_block.buildQueryPipeline(); - - builder.addSimpleTransform([&](const Block & stream_header) - { - // Can't move chunk_infos here, that function could be called several times - return std::make_shared(chunk_infos.clone(), stream_header); - }); - - String window_view_id = window_view.getStorageID().hasUUID() ? toString(window_view.getStorageID().uuid) : window_view.getStorageID().getFullNameNotQuoted(); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(window_view_id, stream_header); - }); - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared(stream_header); - }); - -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Afrer tmp table before squashing", stream_header); - }); -#endif - builder.addSimpleTransform([&](const Block & current_header) { return std::make_shared( @@ -1601,13 +1570,6 @@ void StorageWindowView::writeIntoWindowView( lateness_upper_bound); }); -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Afrer WatermarkTransform", stream_header); - }); -#endif - auto inner_table = window_view.getInnerTable(); auto lock = inner_table->lockForShare( local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); @@ -1624,16 +1586,9 @@ void StorageWindowView::writeIntoWindowView( auto convert_actions = std::make_shared( convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); - builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); + builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, convert_actions); }); } -#ifdef ABORT_ON_LOGICAL_ERROR - builder.addSimpleTransform([&](const Block & stream_header) - { - return std::make_shared("StorageWindowView: Before out", stream_header); - }); -#endif - builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 14ac65091d3..f79867df424 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -166,7 +166,7 @@ public: BlockIO populate(); - static void writeIntoWindowView(StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); + static void writeIntoWindowView(StorageWindowView & window_view, const Block & block, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index ed378169381..84ba92bba00 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -290,7 +290,7 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, size_t min_block_size_rows = mutable_context->getSettingsRef().min_external_table_block_size_rows; size_t min_block_size_bytes = mutable_context->getSettingsRef().min_external_table_block_size_bytes; - auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); + auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); builder->resize(1); builder->addTransform(std::move(squashing)); diff --git a/tests/ci/download_release_packages.py b/tests/ci/download_release_packages.py index 8f3a2190ae8..e8260cf68f1 100755 --- a/tests/ci/download_release_packages.py +++ b/tests/ci/download_release_packages.py @@ -13,26 +13,28 @@ from get_previous_release_tag import ( PACKAGES_DIR = Path("previous_release_package_folder") -def download_packages(release: ReleaseInfo, dest_path: Path = PACKAGES_DIR) -> None: +def download_packages( + release: ReleaseInfo, dest_path: Path = PACKAGES_DIR, debug: bool = False +) -> None: dest_path.mkdir(parents=True, exist_ok=True) logging.info("Will download %s", release) for pkg, url in release.assets.items(): - if not pkg.endswith("_amd64.deb") or "-dbg_" in pkg: + if not pkg.endswith("_amd64.deb") or (not debug and "-dbg_" in pkg): continue pkg_name = dest_path / pkg download_build_with_progress(url, pkg_name) -def download_last_release(dest_path: Path) -> None: +def download_last_release(dest_path: Path, debug: bool = False) -> None: current_release = get_previous_release(None) if current_release is None: raise DownloadException("The current release is not found") - download_packages(current_release, dest_path=dest_path) + download_packages(current_release, dest_path=dest_path, debug=debug) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) release = get_release_by_tag(input()) - download_packages(release) + download_packages(release, debug=True) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index ee459ce35a0..d8e5a7fa27f 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -253,7 +253,7 @@ def main(): packages_path.mkdir(parents=True, exist_ok=True) if validate_bugfix_check: - download_last_release(packages_path) + download_last_release(packages_path, debug=True) else: download_all_deb_packages(check_name, reports_path, packages_path) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 7c74f52b610..80ac1935d95 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -185,7 +185,7 @@ def main(): build_path.mkdir(parents=True, exist_ok=True) if validate_bugfix_check: - download_last_release(build_path) + download_last_release(build_path, debug=True) else: download_all_deb_packages(check_name, reports_path, build_path) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8486e3a885f..8e7002af889 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -34,10 +34,8 @@ import urllib.parse # for crc32 import zlib from argparse import ArgumentParser -from contextlib import closing from datetime import datetime, timedelta from errno import ESRCH -from queue import Full from subprocess import PIPE, Popen from time import sleep, time from typing import Dict, List, Optional, Set, Tuple, Union @@ -69,7 +67,7 @@ TEST_FILE_EXTENSIONS = [".sql", ".sql.j2", ".sh", ".py", ".expect"] VERSION_PATTERN = r"^((\d+\.)?(\d+\.)?(\d+\.)?\d+)$" -TEST_MAX_RUN_TIME_IN_SECONDS = 120 +TEST_MAX_RUN_TIME_IN_SECONDS = 180 class SharedEngineReplacer: @@ -360,37 +358,12 @@ def clickhouse_execute_json( return rows -class Terminated(KeyboardInterrupt): - pass - - -def signal_handler(sig, frame): - raise Terminated(f"Terminated with {sig} signal") - - def stop_tests(): - global stop_tests_triggered_lock - global stop_tests_triggered - global restarted_tests - - with stop_tests_triggered_lock: - print("Stopping tests") - if not stop_tests_triggered.is_set(): - stop_tests_triggered.set() - - # materialize multiprocessing.Manager().list() object before - # sending SIGTERM since this object is a proxy, that requires - # communicating with manager thread, but after SIGTERM will be - # send, this thread will die, and you will get - # ConnectionRefusedError error for any access to "restarted_tests" - # variable. - restarted_tests = [*restarted_tests] - - # send signal to all processes in group to avoid hung check triggering - # (to avoid terminating clickhouse-test itself, the signal should be ignored) - signal.signal(signal.SIGTERM, signal.SIG_IGN) - os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) - signal.signal(signal.SIGTERM, signal.SIG_DFL) + # send signal to all processes in group to avoid hung check triggering + # (to avoid terminating clickhouse-test itself, the signal should be ignored) + signal.signal(signal.SIGTERM, signal.SIG_IGN) + os.killpg(os.getpgid(os.getpid()), signal.SIGTERM) + signal.signal(signal.SIGTERM, signal.SIG_DFL) def get_db_engine(args, database_name): @@ -2071,13 +2044,18 @@ class TestSuite: stop_time = None exit_code = None server_died = None -stop_tests_triggered_lock = None -stop_tests_triggered = None -queue = None multiprocessing_manager = None restarted_tests = None +class ServerDied(Exception): + pass + + +class GlobalTimeout(Exception): + pass + + def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): all_tests, num_tests, test_suite = all_tests_with_params global stop_time @@ -2132,24 +2110,19 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): print(f"\nRunning {about}{num_tests} {test_suite.suite} tests ({proc_name}).\n") while True: - if is_concurrent: - case = queue.get(timeout=args.timeout * 1.1) - if not case: - break + if all_tests: + case = all_tests.pop(0) else: - if all_tests: - case = all_tests.pop(0) - else: - break + break if server_died.is_set(): stop_tests() - break + raise ServerDied("Server died") if stop_time and time() > stop_time: print("\nStop tests run because global time limit is exceeded.\n") stop_tests() - break + raise GlobalTimeout("Stop tests run because global time limit is exceeded") test_case = TestCase(test_suite, case, args, is_concurrent) @@ -2192,7 +2165,6 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): failures_chain += 1 if test_result.reason == FailureReason.SERVER_DIED: server_died.set() - stop_tests() elif test_result.status == TestStatus.SKIPPED: skipped_total += 1 @@ -2203,7 +2175,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite]): if failures_chain >= args.max_failures_chain: stop_tests() - break + raise ServerDied("Max failures chain") if failures_total > 0: print( @@ -2400,7 +2372,7 @@ def extract_key(key: str) -> str: )[1] -def do_run_tests(jobs, test_suite: TestSuite, parallel): +def do_run_tests(jobs, test_suite: TestSuite): if jobs > 1 and len(test_suite.parallel_tests) > 0: print( "Found", @@ -2409,19 +2381,8 @@ def do_run_tests(jobs, test_suite: TestSuite, parallel): len(test_suite.sequential_tests), "sequential tests", ) - run_n, run_total = parallel.split("/") - run_n = float(run_n) - run_total = float(run_total) tests_n = len(test_suite.parallel_tests) - run_total = min(run_total, tests_n) - jobs = min(jobs, tests_n) - run_total = max(jobs, run_total) - - batch_size = max(1, len(test_suite.parallel_tests) // jobs) - parallel_tests_array = [] - for _ in range(jobs): - parallel_tests_array.append((None, batch_size, test_suite)) # If we don't do random shuffling then there will be always # nearly the same groups of test suites running concurrently. @@ -2434,25 +2395,21 @@ def do_run_tests(jobs, test_suite: TestSuite, parallel): # of failures will be nearly the same for all tests from the group. random.shuffle(test_suite.parallel_tests) + batch_size = max(1, len(test_suite.parallel_tests) // jobs) + parallel_tests_array = [] + for job in range(jobs): + range_ = job * batch_size, job * batch_size + batch_size + batch = test_suite.parallel_tests[range_[0] : range_[1]] + parallel_tests_array.append((batch, batch_size, test_suite)) + try: - with closing(multiprocessing.Pool(processes=jobs)) as pool: - pool.map_async(run_tests_array, parallel_tests_array) - - for suit in test_suite.parallel_tests: - queue.put(suit, timeout=args.timeout * 1.1) - - for _ in range(jobs): - queue.put(None, timeout=args.timeout * 1.1) - - queue.close() - except Full: - print( - "Couldn't put test to the queue within timeout. Server probably hung." - ) - print_stacktraces() - queue.close() - - pool.join() + with multiprocessing.Pool(processes=jobs) as pool: + future = pool.map_async(run_tests_array, parallel_tests_array) + future.wait() + finally: + pool.terminate() + pool.close() + pool.join() run_tests_array( (test_suite.sequential_tests, len(test_suite.sequential_tests), test_suite) @@ -2817,7 +2774,7 @@ def main(args): test_suite.cloud_skip_list = cloud_skip_list test_suite.private_skip_list = private_skip_list - total_tests_run += do_run_tests(args.jobs, test_suite, args.parallel) + total_tests_run += do_run_tests(args.jobs, test_suite) if server_died.is_set(): exit_code.value = 1 @@ -3284,9 +3241,6 @@ if __name__ == "__main__": stop_time = None exit_code = multiprocessing.Value("i", 0) server_died = multiprocessing.Event() - stop_tests_triggered_lock = multiprocessing.Lock() - stop_tests_triggered = multiprocessing.Event() - queue = multiprocessing.Queue(maxsize=1) multiprocessing_manager = multiprocessing.Manager() restarted_tests = multiprocessing_manager.list() @@ -3294,9 +3248,6 @@ if __name__ == "__main__": # infinite tests processes left # (new process group is required to avoid killing some parent processes) os.setpgid(0, 0) - signal.signal(signal.SIGTERM, signal_handler) - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGHUP, signal_handler) try: args = parse_args() diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index 97d5da787cd..56ea5c8846a 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -9,7 +9,9 @@ def started_cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance( - "disks_app_test", main_configs=["config.xml"], with_minio=True + "disks_app_test", + main_configs=["config.xml"], + with_minio=True, ) cluster.start() @@ -47,12 +49,18 @@ def test_disks_app_func_ld(started_cluster): source = cluster.instances["disks_app_test"] out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "list-disks"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--query", "list-disks"] ) - disks = out.split("\n") + disks = list( + sorted( + map( + lambda x: x.split(":")[0], filter(lambda x: len(x) > 1, out.split("\n")) + ) + ) + ) - assert disks[0] == "default" and disks[1] == "test1" and disks[2] == "test2" + assert disks[:4] == ["default", "local", "test1", "test2"] def test_disks_app_func_ls(started_cluster): @@ -61,7 +69,15 @@ def test_disks_app_func_ls(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -75,9 +91,8 @@ def test_disks_app_func_ls(started_cluster): "--save-logs", "--disk", "test1", - "list", - ".", - "--recursive", + "--query", + "list . --recursive", ] ) @@ -102,8 +117,8 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test1", - "write", - "path1", + "--query", + "'write path1'", ] ), ] @@ -113,18 +128,21 @@ def test_disks_app_func_cp(started_cluster): [ "/usr/bin/clickhouse", "disks", - "copy", - "--disk-from", - "test1", - "--disk-to", - "test2", - ".", - ".", + "--query", + "copy --recursive --disk-from test1 --disk-to test2 . .", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path1" in out @@ -136,8 +154,8 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test2", - "remove", - "path1", + "--query", + "remove path1", ] ) @@ -148,21 +166,37 @@ def test_disks_app_func_cp(started_cluster): "--save-logs", "--disk", "test1", - "remove", - "path1", + "--query", + "remove path1", ] ) # alesapin: Why we need list one more time? # kssenii: it is an assertion that the file is indeed deleted out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path1" not in out out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) assert "path1" not in out @@ -177,14 +211,13 @@ def test_disks_app_func_ln(started_cluster): [ "/usr/bin/clickhouse", "disks", - "link", - "data/default/test_table", - "data/default/z_tester", + "--query", + "link data/default/test_table data/default/z_tester", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "list", "data/default/"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--query", "list data/default/"] ) files = out.split("\n") @@ -209,15 +242,23 @@ def test_disks_app_func_rm(started_cluster): "--save-logs", "--disk", "test2", - "write", - "path3", + "--query", + "'write path3'", ] ), ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path3" in out @@ -229,13 +270,21 @@ def test_disks_app_func_rm(started_cluster): "--save-logs", "--disk", "test2", - "remove", - "path3", + "--query", + "remove path3", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test2", + "--query", + "list .", + ] ) assert "path3" not in out @@ -247,7 +296,15 @@ def test_disks_app_func_mv(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -260,14 +317,21 @@ def test_disks_app_func_mv(started_cluster): "disks", "--disk", "test1", - "move", - "store", - "old_store", + "--query", + "move store old_store", ] ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test1", + "--query", + "list .", + ] ) files = out.split("\n") @@ -290,8 +354,8 @@ def test_disks_app_func_read_write(started_cluster): "--save-logs", "--disk", "test1", - "write", - "5.txt", + "--query", + "'write 5.txt'", ] ), ] @@ -304,8 +368,8 @@ def test_disks_app_func_read_write(started_cluster): "--save-logs", "--disk", "test1", - "read", - "5.txt", + "--query", + "read 5.txt", ] ) @@ -319,7 +383,15 @@ def test_remote_disk_list(started_cluster): init_data_s3(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test3", "list", "."] + [ + "/usr/bin/clickhouse", + "disks", + "--save-logs", + "--disk", + "test3", + "--query", + "list .", + ] ) files = out.split("\n") @@ -333,9 +405,8 @@ def test_remote_disk_list(started_cluster): "--save-logs", "--disk", "test3", - "list", - ".", - "--recursive", + "--query", + "list . --recursive", ] ) diff --git a/tests/integration/test_disks_app_interactive/__init__.py b/tests/integration/test_disks_app_interactive/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_disks_app_interactive/configs/config.xml b/tests/integration/test_disks_app_interactive/configs/config.xml new file mode 100644 index 00000000000..bcbb107f0a2 --- /dev/null +++ b/tests/integration/test_disks_app_interactive/configs/config.xml @@ -0,0 +1,3 @@ + + /var/lib/clickhouse/ + \ No newline at end of file diff --git a/tests/integration/test_disks_app_interactive/test.py b/tests/integration/test_disks_app_interactive/test.py new file mode 100644 index 00000000000..ca4ba5d9065 --- /dev/null +++ b/tests/integration/test_disks_app_interactive/test.py @@ -0,0 +1,331 @@ +from helpers.cluster import ClickHouseCluster + +import pytest + +import pathlib + +import subprocess +import select +import io +from typing import List, Tuple, Dict, Union, Optional + +import os + + +class ClickHouseDisksException(Exception): + pass + + +@pytest.fixture(scope="module") +def started_cluster(): + global cluster + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "disks_app_test", + main_configs=["server_configs/config.xml"], + with_minio=True, + ) + + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +class DisksClient(object): + SEPARATOR = b"\a\a\a\a\n" + local_client: Optional["DisksClient"] = None # static variable + default_disk_root_directory: str = "/var/lib/clickhouse" + + def __init__(self, bin_path: str, config_path: str, working_path: str): + self.bin_path = bin_path + self.working_path = working_path + + self.proc = subprocess.Popen( + [bin_path, "disks", "--test-mode", "--config", config_path], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + self.poller = select.epoll() + self.poller.register(self.proc.stdout) + self.poller.register(self.proc.stderr) + + self.stopped = False + + self._fd_nums = { + self.proc.stdout.fileno(): self.proc.stdout, + self.proc.stderr.fileno(): self.proc.stderr, + } + + def execute_query(self, query: str, timeout: float = 5.0) -> str: + output = io.BytesIO() + + self.proc.stdin.write(query.encode() + b"\n") + self.proc.stdin.flush() + + events = self.poller.poll(timeout) + if not events: + raise TimeoutError(f"Disks client returned no output") + + for fd_num, event in events: + if event & (select.EPOLLIN | select.EPOLLPRI): + file = self._fd_nums[fd_num] + + if file == self.proc.stdout: + while True: + chunk = file.readline() + if chunk.endswith(self.SEPARATOR): + break + + output.write(chunk) + + elif file == self.proc.stderr: + error_line = self.proc.stderr.readline() + print(error_line) + raise ClickHouseDisksException(error_line.strip().decode()) + + else: + raise ValueError(f"Failed to read from pipe. Flag {event}") + + data = output.getvalue().strip().decode() + return data + + def list_disks(self) -> List[Tuple[str, str]]: + output = self.execute_query("list-disks") + return list( + sorted( + map( + lambda x: (x.split(":")[0], ":".join(x.split(":")[1:])), + output.split("\n"), + ) + ) + ) + + def current_disk_with_path(self) -> Tuple[str, str]: + output = self.execute_query("current_disk_with_path") + disk_line = output.split("\n")[0] + path_line = output.split("\n")[1] + assert disk_line.startswith("Disk: ") + assert path_line.startswith("Path: ") + return disk_line[6:], path_line[6:] + + def ls( + self, path: str, recursive: bool = False, show_hidden: bool = False + ) -> Union[List[str], Dict[str, List[str]]]: + recursive_adding = "--recursive " if recursive else "" + show_hidden_adding = "--all " if show_hidden else "" + output = self.execute_query( + f"list {path} {recursive_adding} {show_hidden_adding}" + ) + if recursive: + answer: Dict[str, List[str]] = dict() + blocks = output.split("\n\n") + for block in blocks: + directory = block.split("\n")[0][:-1] + files = block.split("\n")[1:] + answer[directory] = files + return answer + else: + return output.split("\n") + + def switch_disk(self, disk: str, directory: Optional[str] = None): + directory_addition = f"--path {directory} " if directory is not None else "" + self.execute_query(f"switch-disk {disk} {directory_addition}") + + def cd(self, directory: str, disk: Optional[str] = None): + disk_addition = f"--disk {disk} " if disk is not None else "" + self.execute_query(f"cd {directory} {disk_addition}") + + def copy( + self, + path_from, + path_to, + disk_from: Optional[str] = None, + disk_to: Optional[str] = None, + recursive: bool = False, + ): + disk_from_option = f"--disk-from {disk_from} " if disk_from is not None else "" + disk_to_option = f"--disk-to {disk_to} " if disk_to is not None else "" + recursive_tag = "--recursive" if recursive else "" + + self.execute_query( + f"copy {recursive_tag} {path_from} {path_to} {disk_from_option} {disk_to_option}" + ) + + def move(self, path_from: str, path_to: str): + self.execute_query(f"move {path_from} {path_to}") + + def rm(self, path: str, recursive: bool = False): + recursive_tag = "--recursive" if recursive else "" + self.execute_query(f"rm {recursive_tag} {path}") + + def mkdir(self, path: str, recursive: bool = False): + recursive_adding = "--recursive " if recursive else "" + self.execute_query(f"mkdir {path} {recursive_adding}") + + def ln(self, path_from: str, path_to: str): + self.execute_query(f"link {path_from} {path_to}") + + def read(self, path_from: str, path_to: Optional[str] = None): + path_to_adding = f"--path-to {path_to} " if path_to is not None else "" + output = self.execute_query(f"read {path_from} {path_to_adding}") + return output + + def write( + self, path_from: str, path_to: str + ): # Writing from stdin is difficult to test (do not know how to do this in python) + path_from_adding = f"--path-from {path_from}" + self.execute_query(f"write {path_from_adding} {path_to}") + + @staticmethod + def getLocalDisksClient(refresh: bool): + if (DisksClient.local_client is None) or refresh: + binary_file = os.environ.get("CLICKHOUSE_TESTS_SERVER_BIN_PATH") + current_working_directory = str(pathlib.Path().resolve()) + config_file = f"{current_working_directory}/test_disks_app_interactive/configs/config.xml" + if not os.path.exists(DisksClient.default_disk_root_directory): + os.mkdir(DisksClient.default_disk_root_directory) + + DisksClient.local_client = DisksClient( + binary_file, config_file, current_working_directory + ) + return DisksClient.local_client + else: + return DisksClient.local_client + + +def test_disks_app_interactive_list_disks(): + client = DisksClient.getLocalDisksClient(True) + expected_disks_with_path = [ + ("default", "/"), + ("local", client.working_path), + ] + assert expected_disks_with_path == client.list_disks() + assert client.current_disk_with_path() == ("default", "/") + client.switch_disk("local") + assert client.current_disk_with_path() == ( + "local", + client.working_path, + ) + + +def test_disks_app_interactive_list_files_local(): + client = DisksClient.getLocalDisksClient(True) + client.switch_disk("local") + excepted_listed_files = sorted(os.listdir("test_disks_app_interactive/")) + listed_files = sorted(client.ls("test_disks_app_interactive/")) + assert excepted_listed_files == listed_files + + +def test_disks_app_interactive_list_directories_default(): + client = DisksClient.getLocalDisksClient(True) + traversed_dir = client.ls(".", recursive=True) + client.mkdir("dir1") + client.mkdir("dir2") + client.mkdir(".dir3") + client.cd("dir1") + client.mkdir("dir11") + client.mkdir(".dir12") + client.mkdir("dir13") + client.cd("../dir2") + client.mkdir("dir21") + client.mkdir("dir22") + client.mkdir(".dir23") + client.cd("../.dir3") + client.mkdir("dir31") + client.mkdir(".dir32") + client.cd("..") + traversed_dir = client.ls(".", recursive=True) + assert traversed_dir == { + ".": ["dir1", "dir2"], + "./dir1": ["dir11", "dir13"], + "./dir2": ["dir21", "dir22"], + "./dir1/dir11": [], + "./dir1/dir13": [], + "./dir2/dir21": [], + "./dir2/dir22": [], + } + traversed_dir = client.ls(".", recursive=True, show_hidden=True) + assert traversed_dir == { + ".": [".dir3", "dir1", "dir2"], + "./dir1": [".dir12", "dir11", "dir13"], + "./dir2": [".dir23", "dir21", "dir22"], + "./.dir3": [".dir32", "dir31"], + "./dir1/dir11": [], + "./dir1/.dir12": [], + "./dir1/dir13": [], + "./dir2/dir21": [], + "./dir2/dir22": [], + "./dir2/.dir23": [], + "./.dir3/dir31": [], + "./.dir3/.dir32": [], + } + client.rm("dir2", recursive=True) + traversed_dir = client.ls(".", recursive=True, show_hidden=True) + assert traversed_dir == { + ".": [".dir3", "dir1"], + "./dir1": [".dir12", "dir11", "dir13"], + "./.dir3": [".dir32", "dir31"], + "./dir1/dir11": [], + "./dir1/.dir12": [], + "./dir1/dir13": [], + "./.dir3/dir31": [], + "./.dir3/.dir32": [], + } + traversed_dir = client.ls(".", recursive=True, show_hidden=False) + assert traversed_dir == { + ".": ["dir1"], + "./dir1": ["dir11", "dir13"], + "./dir1/dir11": [], + "./dir1/dir13": [], + } + client.rm("dir1", recursive=True) + client.rm(".dir3", recursive=True) + assert client.ls(".", recursive=True, show_hidden=False) == {".": []} + + +def test_disks_app_interactive_cp_and_read(): + initial_text = "File content" + with open("a.txt", "w") as file: + file.write(initial_text) + client = DisksClient.getLocalDisksClient(True) + client.switch_disk("default") + client.copy("a.txt", "/a.txt", disk_from="local", disk_to="default") + read_text = client.read("a.txt") + assert initial_text == read_text + client.mkdir("dir1") + client.copy("a.txt", "/dir1/b.txt", disk_from="local", disk_to="default") + read_text = client.read("a.txt", path_to="dir1/b.txt") + assert "" == read_text + read_text = client.read("/dir1/b.txt") + assert read_text == initial_text + with open(f"{DisksClient.default_disk_root_directory}/dir1/b.txt", "r") as file: + read_text = file.read() + assert read_text == initial_text + os.remove("a.txt") + client.rm("a.txt") + client.rm("/dir1", recursive=True) + + +def test_disks_app_interactive_test_move_and_write(): + initial_text = "File content" + with open("a.txt", "w") as file: + file.write(initial_text) + client = DisksClient.getLocalDisksClient(True) + client.switch_disk("default") + client.copy("a.txt", "/a.txt", disk_from="local", disk_to="default") + files = client.ls(".") + assert files == ["a.txt"] + client.move("a.txt", "b.txt") + files = client.ls(".") + assert files == ["b.txt"] + read_text = client.read("/b.txt") + assert read_text == initial_text + client.write("b.txt", "c.txt") + read_text = client.read("c.txt") + assert read_text == initial_text + os.remove("a.txt") diff --git a/tests/integration/test_force_deduplication/test.py b/tests/integration/test_force_deduplication/test.py index 14c11bc8500..87b2c45bbc5 100644 --- a/tests/integration/test_force_deduplication/test.py +++ b/tests/integration/test_force_deduplication/test.py @@ -29,8 +29,6 @@ def get_counts(): def test_basic(start_cluster): - old_src, old_a, old_b, old_c = 0, 0, 0, 0 - node.query( """ CREATE TABLE test (A Int64) ENGINE = ReplicatedMergeTree ('/clickhouse/test/tables/test','1') ORDER BY tuple(); @@ -41,15 +39,6 @@ def test_basic(start_cluster): INSERT INTO test values(999); """ ) - - src, a, b, c = get_counts() - assert src == old_src + 1 - assert a == old_a + 2 - assert b == old_b + 2 - assert c == old_c + 2 - old_src, old_a, old_b, old_c = src, a, b, c - - # that issert fails on test_mv_b due to partitions by A with pytest.raises(QueryRuntimeException): node.query( """ @@ -57,51 +46,34 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(10); """ ) - src, a, b, c = get_counts() - assert src == old_src + 10 - assert a == old_a + 10 - assert b == old_b - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - # deduplication only for src table + old_src, old_a, old_b, old_c = get_counts() + # number of rows in test_mv_a and test_mv_c depends on order of inserts into views + assert old_src == 11 + assert old_a in (1, 11) + assert old_b == 1 + assert old_c in (1, 11) + node.query("INSERT INTO test SELECT number FROM numbers(10)") src, a, b, c = get_counts() - assert src == old_src - assert a == old_a + 10 - assert b == old_b + 10 - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - - # deduplication for MV tables does not work, because previous inserts have not written their deduplications tokens to the log due to `deduplicate_blocks_in_dependent_materialized_views = 0`. - node.query( - """ - SET deduplicate_blocks_in_dependent_materialized_views = 1; - INSERT INTO test SELECT number FROM numbers(10); - """ - ) - src, a, b, c = get_counts() - assert src == old_src - assert a == old_a + 10 - assert b == old_b + 10 - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - - # deduplication for all the tables - node.query( - """ - SET deduplicate_blocks_in_dependent_materialized_views = 1; - INSERT INTO test SELECT number FROM numbers(10); - """ - ) - src, a, b, c = get_counts() + # no changes because of deduplication in source table assert src == old_src assert a == old_a assert b == old_b assert c == old_c - old_src, old_a, old_b, old_c = src, a, b, c - # that issert fails on test_mv_b due to partitions by A, it is an uniq data which is not deduplicated + node.query( + """ + SET deduplicate_blocks_in_dependent_materialized_views = 1; + INSERT INTO test SELECT number FROM numbers(10); + """ + ) + src, a, b, c = get_counts() + assert src == 11 + assert a == old_a + 10 # first insert could be succesfull with disabled dedup + assert b == 11 + assert c == old_c + 10 + with pytest.raises(QueryRuntimeException): node.query( """ @@ -110,23 +82,16 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(100,10); """ ) - src, a, b, c = get_counts() - assert src == old_src + 10 - assert a == old_a + 10 - assert b == old_b - assert c == old_c + 10 - old_src, old_a, old_b, old_c = src, a, b, c - # deduplication for all tables, except test_mv_b. For test_mv_b it is an uniq data which is not deduplicated due to exception at previous insert node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; INSERT INTO test SELECT number FROM numbers(100,10); """ ) + src, a, b, c = get_counts() - assert src == old_src - assert a == old_a - assert b == old_b + 10 - assert c == old_c - old_src, old_a, old_b, old_c = src, a, b, c + assert src == 21 + assert a == old_a + 20 + assert b == 21 + assert c == old_c + 20 diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py index 07a9e2badff..affa3f32cbe 100644 --- a/tests/integration/test_parallel_replicas_custom_key/test.py +++ b/tests/integration/test_parallel_replicas_custom_key/test.py @@ -5,7 +5,10 @@ cluster = ClickHouseCluster(__file__) nodes = [ cluster.add_instance( - f"n{i}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True + f"n{i}", + main_configs=["configs/remote_servers.xml"], + with_zookeeper=True, + macros={"replica": f"r{i}"}, ) for i in range(1, 5) ] @@ -20,34 +23,17 @@ def start_cluster(): cluster.shutdown() -def create_tables(cluster): - n1 = nodes[0] - n1.query("DROP TABLE IF EXISTS dist_table") - n1.query(f"DROP TABLE IF EXISTS test_table ON CLUSTER {cluster}") - - n1.query( - f"CREATE TABLE test_table ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))" - ) - n1.query( - f""" - CREATE TABLE dist_table AS test_table - Engine=Distributed( - {cluster}, - currentDatabase(), - test_table, - rand() - ) - """ +def insert_data(table_name, row_num, all_nodes=False): + query = ( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers({row_num})" ) - -def insert_data(cluster, row_num): - create_tables(cluster) - n1 = nodes[0] - n1.query( - f"INSERT INTO dist_table SELECT number % 4, number FROM numbers({row_num})" - ) - n1.query("SYSTEM FLUSH DISTRIBUTED dist_table") + if all_nodes: + for n in nodes: + n.query(query) + else: + n1 = nodes[0] + n1.query(query) @pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) @@ -56,12 +42,36 @@ def insert_data(cluster, row_num): "cluster", ["test_multiple_shards_multiple_replicas", "test_single_shard_multiple_replicas"], ) -def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter_type): +def test_parallel_replicas_custom_key_distributed( + start_cluster, cluster, custom_key, filter_type +): for node in nodes: node.rotate_logs() row_num = 1000 - insert_data(cluster, row_num) + + n1 = nodes[0] + n1.query(f"DROP TABLE IF EXISTS dist_table ON CLUSTER {cluster} SYNC") + n1.query(f"DROP TABLE IF EXISTS test_table_for_dist ON CLUSTER {cluster} SYNC") + n1.query( + f"CREATE TABLE test_table_for_dist ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))" + ) + + n1.query( + f""" + CREATE TABLE dist_table AS test_table_for_dist + Engine=Distributed( + {cluster}, + currentDatabase(), + test_table_for_dist, + rand() + ) + """ + ) + + insert_data("dist_table", row_num) + + n1.query("SYSTEM FLUSH DISTRIBUTED dist_table") expected_result = "" for i in range(4): @@ -72,10 +82,10 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter n1.query( "SELECT key, count() FROM dist_table GROUP BY key ORDER BY key", settings={ - "prefer_localhost_replica": 0, "max_parallel_replicas": 4, "parallel_replicas_custom_key": custom_key, "parallel_replicas_custom_key_filter_type": filter_type, + "prefer_localhost_replica": 0, }, ) == expected_result @@ -87,3 +97,84 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter node.contains_in_log("Processing query on a replica using custom_key") for node in nodes ) + + +@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) +@pytest.mark.parametrize("filter_type", ["default", "range"]) +@pytest.mark.parametrize( + "cluster", + ["test_single_shard_multiple_replicas"], +) +def test_parallel_replicas_custom_key_mergetree( + start_cluster, cluster, custom_key, filter_type +): + for node in nodes: + node.rotate_logs() + + row_num = 1000 + n1 = nodes[0] + n1.query(f"DROP TABLE IF EXISTS test_table_for_mt ON CLUSTER {cluster} SYNC") + n1.query( + f"CREATE TABLE test_table_for_mt ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))" + ) + + insert_data("test_table_for_mt", row_num, all_nodes=True) + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t250\n" + + n1 = nodes[0] + assert ( + n1.query( + "SELECT key, count() FROM test_table_for_mt GROUP BY key ORDER BY key", + settings={ + "max_parallel_replicas": 4, + "parallel_replicas_custom_key": custom_key, + "parallel_replicas_custom_key_filter_type": filter_type, + "parallel_replicas_for_non_replicated_merge_tree": 1, + "cluster_for_parallel_replicas": cluster, + }, + ) + == expected_result + ) + + +@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) +@pytest.mark.parametrize("filter_type", ["default", "range"]) +@pytest.mark.parametrize( + "cluster", + ["test_single_shard_multiple_replicas"], +) +def test_parallel_replicas_custom_key_replicatedmergetree( + start_cluster, cluster, custom_key, filter_type +): + for node in nodes: + node.rotate_logs() + + row_num = 1000 + n1 = nodes[0] + n1.query(f"DROP TABLE IF EXISTS test_table_for_rmt ON CLUSTER {cluster} SYNC") + n1.query( + f"CREATE TABLE test_table_for_rmt ON CLUSTER {cluster} (key UInt32, value String) Engine=ReplicatedMergeTree('/clickhouse/tables', '{{replica}}') ORDER BY (key, sipHash64(value))" + ) + + insert_data("test_table_for_rmt", row_num, all_nodes=False) + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t250\n" + + n1 = nodes[0] + assert ( + n1.query( + "SELECT key, count() FROM test_table_for_rmt GROUP BY key ORDER BY key", + settings={ + "max_parallel_replicas": 4, + "parallel_replicas_custom_key": custom_key, + "parallel_replicas_custom_key_filter_type": filter_type, + "cluster_for_parallel_replicas": cluster, + }, + ) + == expected_result + ) diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/test.py b/tests/integration/test_parallel_replicas_custom_key_failover/test.py index 3ba3ce092c3..f24a24f3238 100644 --- a/tests/integration/test_parallel_replicas_custom_key_failover/test.py +++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py @@ -76,11 +76,11 @@ def test_parallel_replicas_custom_key_failover( f"SELECT key, count() FROM cluster('{cluster_name}', currentDatabase(), test_table) GROUP BY key ORDER BY key", settings={ "log_comment": log_comment, - "prefer_localhost_replica": prefer_localhost_replica, "max_parallel_replicas": 4, "parallel_replicas_custom_key": custom_key, "parallel_replicas_custom_key_filter_type": filter_type, "use_hedged_requests": use_hedged_requests, + "prefer_localhost_replica": prefer_localhost_replica, # avoid considering replica delay on connection choice # otherwise connection can be not distributed evenly among available nodes # and so custom key secondary queries (we check it bellow) @@ -100,20 +100,19 @@ def test_parallel_replicas_custom_key_failover( assert query_id != "" query_id = query_id[:-1] - if prefer_localhost_replica == 0: + assert ( + node1.query( + f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + ) + == "subqueries\t4\n" + ) + + # With enabled hedged requests, we can't guarantee exact query distribution among nodes + # In case of a replica being slow in terms of responsiveness, hedged connection can change initial replicas choice + if use_hedged_requests == 0: assert ( node1.query( - f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" ) - == "subqueries\t4\n" + == "n1\t3\nn3\t2\n" ) - - # With enabled hedged requests, we can't guarantee exact query distribution among nodes - # In case of a replica being slow in terms of responsiveness, hedged connection can change initial replicas choice - if use_hedged_requests == 0: - assert ( - node1.query( - f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" - ) - == "n1\t3\nn3\t2\n" - ) diff --git a/tests/integration/test_startup_scripts/__init__.py b/tests/integration/test_startup_scripts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_startup_scripts/configs/config.d/query_log.xml b/tests/integration/test_startup_scripts/configs/config.d/query_log.xml new file mode 100644 index 00000000000..24d66fc674e --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/config.d/query_log.xml @@ -0,0 +1,8 @@ + + + system + query_log
+ toYYYYMM(event_date) + 1000 +
+
diff --git a/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml b/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml new file mode 100644 index 00000000000..e8a711a926a --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/config.d/startup_scripts.xml @@ -0,0 +1,17 @@ + + + + CREATE ROLE OR REPLACE testrole + + + GRANT CREATE USER, ALTER USER, DROP USER, SHOW USERS, SHOW CREATE USER ON *.* TO 'testrole' WITH GRANT OPTION; + + + CREATE TABLE TestTable (id UInt64) ENGINE=TinyLog + SELECT 1; + + + SELECT * FROM system.query_log LIMIT 1 + + + diff --git a/tests/integration/test_startup_scripts/configs/users.xml b/tests/integration/test_startup_scripts/configs/users.xml new file mode 100644 index 00000000000..f9917b034b2 --- /dev/null +++ b/tests/integration/test_startup_scripts/configs/users.xml @@ -0,0 +1,41 @@ + + + + + + + + 1 + + + + + + + + + + ::/0 + + + default + + default + + + + + + + + 3600 + + 0 + 0 + 0 + 0 + 0 + + + + diff --git a/tests/integration/test_startup_scripts/test.py b/tests/integration/test_startup_scripts/test.py new file mode 100644 index 00000000000..43a871a6fc5 --- /dev/null +++ b/tests/integration/test_startup_scripts/test.py @@ -0,0 +1,21 @@ +from helpers.cluster import ClickHouseCluster + + +def test_startup_scripts(): + cluster = ClickHouseCluster(__file__) + + node = cluster.add_instance( + "node", + main_configs=[ + "configs/config.d/query_log.xml", + "configs/config.d/startup_scripts.xml", + ], + with_zookeeper=False, + ) + + try: + cluster.start() + assert node.query("SHOW TABLES") == "TestTable\n" + + finally: + cluster.shutdown() diff --git a/tests/performance/replaceRegexp_fallback.xml b/tests/performance/replaceRegexp_fallback.xml new file mode 100644 index 00000000000..926e66c702f --- /dev/null +++ b/tests/performance/replaceRegexp_fallback.xml @@ -0,0 +1,12 @@ +> +> + + > + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null + + > + > + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null + WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null + diff --git a/tests/queries/0_stateless/00027_argMinMax.sql b/tests/queries/0_stateless/00027_argMinMax.sql index dbf7c9176d2..57f815add27 100644 --- a/tests/queries/0_stateless/00027_argMinMax.sql +++ b/tests/queries/0_stateless/00027_argMinMax.sql @@ -13,4 +13,4 @@ FROM SELECT arrayJoin([[10, 4, 3], [7, 5, 6], [8, 8, 2]]) AS num, arrayJoin([[1, 2, 4]]) AS id -) +); diff --git a/tests/queries/0_stateless/00307_format_xml.sql b/tests/queries/0_stateless/00307_format_xml.sql index 7fdca83b69f..29c733bb186 100644 --- a/tests/queries/0_stateless/00307_format_xml.sql +++ b/tests/queries/0_stateless/00307_format_xml.sql @@ -1,2 +1,2 @@ SET output_format_write_statistics = 0; -SELECT 'Hello & world' AS s, 'Hello\n', toDateTime('2001-02-03 04:05:06') AS time, arrayMap(x -> toString(x), range(10)) AS arr, (s, time) AS tpl SETTINGS extremes = 1 FORMAT XML; +SELECT 'Hello & world' AS s, 'Hello\n', toDateTime('2001-02-03 04:05:06') AS time, arrayMap(x -> toString(x), range(10)) AS arr, (s, time) AS tpl SETTINGS extremes = 1, enable_named_columns_in_function_tuple = 0 FORMAT XML; diff --git a/tests/queries/0_stateless/00309_formats.sql b/tests/queries/0_stateless/00309_formats.sql index 87a1ea454d0..b0939c00a10 100644 --- a/tests/queries/0_stateless/00309_formats.sql +++ b/tests/queries/0_stateless/00309_formats.sql @@ -1,4 +1,6 @@ SET output_format_write_statistics = 0; +SET enable_named_columns_in_function_tuple = 0; + SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT RowBinary; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT RowBinaryWithNamesAndTypes; SELECT number * 246 + 10 AS n, toDate('2000-01-01') + n AS d, range(n) AS arr, arrayStringConcat(arrayMap(x -> reinterpretAsString(x), arr)) AS s, (n, d) AS tuple FROM system.numbers LIMIT 2 FORMAT TabSeparatedWithNamesAndTypes; diff --git a/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql b/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql index 7c939d060ea..14929045356 100644 --- a/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql +++ b/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql @@ -1,5 +1,8 @@ set allow_deprecated_syntax_for_merge_tree=1; +set max_threads = 1; +set max_insert_threads = 1; + drop table if exists test_ins_arr; create table test_ins_arr (date Date, val Array(UInt64)) engine = MergeTree(date, (date), 8192); insert into test_ins_arr select toDate('2017-10-02'), [number, 42] from system.numbers limit 10000; diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference index 9c9281dc7e4..adf6abb7298 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference @@ -1,7 +1,7 @@ 2 3 -3 +2 3 1 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index 51e6a513608..d3c4da86b41 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -29,7 +29,7 @@ INSERT INTO without_deduplication VALUES (43); SELECT count() FROM with_deduplication; SELECT count() FROM without_deduplication; --- Implicit insert isn't deduplicated, because deduplicate_blocks_in_dependent_materialized_views = 0 by default +-- Implicit insert isn't deduplicated SELECT ''; SELECT countMerge(cnt) FROM with_deduplication_mv; SELECT countMerge(cnt) FROM without_deduplication_mv; diff --git a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh index 8f7d19028b0..1fb219108da 100755 --- a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh +++ b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh @@ -36,8 +36,8 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE c" echo ${CLICKHOUSE_CLIENT} --query "CREATE TABLE root (d UInt64) ENGINE = Null" ${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW d (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/d', '1') ORDER BY d AS SELECT * FROM root" -${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; -${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; ${CLICKHOUSE_CLIENT} --query "SELECT * FROM d"; ${CLICKHOUSE_CLIENT} --query "DROP TABLE root" ${CLICKHOUSE_CLIENT} --query "DROP TABLE d" diff --git a/tests/queries/0_stateless/01144_multiword_data_types.sql b/tests/queries/0_stateless/01144_multiword_data_types.sql index cc380f82d63..56def658ae0 100644 --- a/tests/queries/0_stateless/01144_multiword_data_types.sql +++ b/tests/queries/0_stateless/01144_multiword_data_types.sql @@ -23,7 +23,7 @@ CREATE TABLE multiword_types ( SHOW CREATE TABLE multiword_types; INSERT INTO multiword_types(a) VALUES (1); -SELECT toTypeName((*,)) FROM multiword_types; +SELECT toTypeName((*,)) FROM multiword_types SETTINGS enable_named_columns_in_function_tuple = 0; CREATE TABLE unsigned_types ( a TINYINT SIGNED, @@ -43,7 +43,7 @@ CREATE TABLE unsigned_types ( SHOW CREATE TABLE unsigned_types; INSERT INTO unsigned_types(a) VALUES (1); -SELECT toTypeName((*,)) FROM unsigned_types; +SELECT toTypeName((*,)) FROM unsigned_types SETTINGS enable_named_columns_in_function_tuple = 0; SELECT CAST('42' AS DOUBLE PRECISION), CAST(42, 'NATIONAL CHARACTER VARYING'), CAST(-1 AS tinyint UnSiGnEd), CAST(65535, ' sMaLlInT signed '); diff --git a/tests/queries/0_stateless/01232_untuple.reference b/tests/queries/0_stateless/01232_untuple.reference index 0358cde1354..3cd8eaa5611 100644 --- a/tests/queries/0_stateless/01232_untuple.reference +++ b/tests/queries/0_stateless/01232_untuple.reference @@ -2,7 +2,7 @@ hello 1 3 world 9 9 (0,1) -key tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'1\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'2\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'3\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'4\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'5\') +key tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v1\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v2\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v3\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v4\') tupleElement(argMax((v1, v2, v3, v4, v5), v1), \'v5\') 1 20 20 10 20 30 2 11 20 10 20 30 3 70 20 10 20 30 diff --git a/tests/queries/0_stateless/01232_untuple.sql b/tests/queries/0_stateless/01232_untuple.sql index ccefd13a772..391d08ab859 100644 --- a/tests/queries/0_stateless/01232_untuple.sql +++ b/tests/queries/0_stateless/01232_untuple.sql @@ -1,4 +1,5 @@ SET allow_experimental_analyzer = 1; +SET enable_named_columns_in_function_tuple = 1; select untuple((* except (b),)) from (select 1 a, 2 b, 3 c); select 'hello', untuple((* except (b),)), 'world' from (select 1 a, 2 b, 3 c); diff --git a/tests/queries/0_stateless/01246_buffer_flush.sh b/tests/queries/0_stateless/01246_buffer_flush.sh new file mode 100755 index 00000000000..1ca953c80d9 --- /dev/null +++ b/tests/queries/0_stateless/01246_buffer_flush.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function elapsed_sec() +{ + local expr=$1 && shift + local start end + start=$(date +%s.%N) + while ! eval "$expr"; do + sleep 0.5 + done + end=$(date +%s.%N) + $CLICKHOUSE_LOCAL -q "select floor($end-$start)" +} + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data_01256; + drop table if exists buffer_01256; + + create table data_01256 as system.numbers Engine=Memory(); +" + +echo "min" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 2, 100, /* time */ + 4, 100, /* rows */ + 1, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 5; + select count() from data_01256; +" +sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 5 ]]') +[[ $sec -ge 2 ]] || echo "Buffer flushed too early, min_time=2, flushed after $sec sec" +[[ $sec -lt 100 ]] || echo "Buffer flushed too late, max_time=100, flushed after $sec sec" +$CLICKHOUSE_CLIENT -q "select count() from data_01256" +$CLICKHOUSE_CLIENT -q "drop table buffer_01256" + +echo "max" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 100, 2, /* time */ + 0, 100, /* rows */ + 0, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 5; + select count() from data_01256; +" +sec=$(elapsed_sec '[[ $($CLICKHOUSE_CLIENT -q "select count() from data_01256") -eq 10 ]]') +[[ $sec -ge 2 ]] || echo "Buffer flushed too early, max_time=2, flushed after $sec sec" +$CLICKHOUSE_CLIENT -q "select count() from data_01256" +$CLICKHOUSE_CLIENT -q "drop table buffer_01256" + +echo "direct" +$CLICKHOUSE_CLIENT -nm -q " + create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, + 100, 100, /* time */ + 0, 9, /* rows */ + 0, 1e6 /* bytes */ + ); + insert into buffer_01256 select * from system.numbers limit 10; + select count() from data_01256; +" + +echo "drop" +$CLICKHOUSE_CLIENT -nm -q " + insert into buffer_01256 select * from system.numbers limit 10; + drop table if exists buffer_01256; + select count() from data_01256; +" + +$CLICKHOUSE_CLIENT -q "drop table data_01256" diff --git a/tests/queries/0_stateless/01246_buffer_flush.sql b/tests/queries/0_stateless/01246_buffer_flush.sql deleted file mode 100644 index 66f93371c29..00000000000 --- a/tests/queries/0_stateless/01246_buffer_flush.sql +++ /dev/null @@ -1,50 +0,0 @@ --- Tags: no-fasttest - -SET function_sleep_max_microseconds_per_block = 4000000; - -drop table if exists data_01256; -drop table if exists buffer_01256; - -create table data_01256 as system.numbers Engine=Memory(); - -select 'min'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 5, 100, /* time */ - 4, 100, /* rows */ - 1, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 5; -select count() from data_01256; --- It is enough to ensure that the buffer will be flushed earlier then 2*min_time (10 sec) -select sleepEachRow(9) FORMAT Null SETTINGS function_sleep_max_microseconds_per_block=10e6; -select count() from data_01256; -drop table buffer_01256; - -select 'max'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 100, 2, /* time */ - 0, 100, /* rows */ - 0, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 5; -select count() from data_01256; --- sleep 2 (min time) + 1 (round up) + bias (1) = 4 -select sleepEachRow(2) from numbers(2) FORMAT Null; -select count() from data_01256; -drop table buffer_01256; - -select 'direct'; -create table buffer_01256 as system.numbers Engine=Buffer(currentDatabase(), data_01256, 1, - 100, 100, /* time */ - 0, 9, /* rows */ - 0, 1e6 /* bytes */ -); -insert into buffer_01256 select * from system.numbers limit 10; -select count() from data_01256; - -select 'drop'; -insert into buffer_01256 select * from system.numbers limit 10; -drop table if exists buffer_01256; -select count() from data_01256; - -drop table data_01256; diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index dadf2f35e6e..a9801e3b910 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -137,7 +137,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '0' and Settings['max_insert_threads'] = '16'; -18 +5 select count() from testX; 60 select count() from testXA; @@ -185,7 +185,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '1' and Settings['max_insert_threads'] = '16'; -18 +5 select count() from testX; 80 select count() from testXA; diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.reference b/tests/queries/0_stateless/01563_distributed_query_finish.reference index c3688b553c4..b48979a492e 100644 --- a/tests/queries/0_stateless/01563_distributed_query_finish.reference +++ b/tests/queries/0_stateless/01563_distributed_query_finish.reference @@ -1,2 +1 @@ -1,0 NETWORK_ERROR=0 diff --git a/tests/queries/0_stateless/01563_distributed_query_finish.sh b/tests/queries/0_stateless/01563_distributed_query_finish.sh index 0019c714e40..e3c5928f108 100755 --- a/tests/queries/0_stateless/01563_distributed_query_finish.sh +++ b/tests/queries/0_stateless/01563_distributed_query_finish.sh @@ -19,20 +19,25 @@ create table dist_01247 as data_01247 engine=Distributed(test_cluster_two_shards select * from dist_01247 format Null; EOL -network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'") +# NOTE: it is possible to got NETWORK_ERROR even with no-parallel, at least due to system.*_log_sender to the cloud +for ((i = 0; i < 100; ++i)); do + network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'") -opts=( - "--max_distributed_connections=1" - "--optimize_skip_unused_shards=1" - "--optimize_distributed_group_by_sharding_key=1" - "--prefer_localhost_replica=0" -) -$CLICKHOUSE_CLIENT "${opts[@]}" --format CSV -nm < filter is pushed down before CreatingSets CreatingSets Filter +Filter 1 3 > one condition of filter is pushed down before LEFT JOIN diff --git a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql index 1301135b4cb..2193fc7a8f4 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql +++ b/tests/queries/0_stateless/01655_plan_optimizations_merge_filters.sql @@ -1,3 +1,5 @@ +set query_plan_merge_filters=1; + set allow_experimental_analyzer=1; select explain from (explain actions = 1 select * from (select sum(number) as v, bitAnd(number, 15) as key from numbers(1e8) group by key having v != 0) where key = 7) where explain like '%Filter%' or explain like '%Aggregating%'; diff --git a/tests/queries/0_stateless/01927_query_views_log_current_database.sql b/tests/queries/0_stateless/01927_query_views_log_current_database.sql index 6287156daaf..ba42795333c 100644 --- a/tests/queries/0_stateless/01927_query_views_log_current_database.sql +++ b/tests/queries/0_stateless/01927_query_views_log_current_database.sql @@ -16,7 +16,6 @@ CREATE MATERIALIZED VIEW matview_b_to_c TO table_c AS SELECT SUM(a + sleepEachRo CREATE MATERIALIZED VIEW matview_join_d_e TO table_f AS SELECT table_d.a as a, table_e.count + sleepEachRow(0.000003) as count FROM table_d LEFT JOIN table_e ON table_d.a = table_e.a; -- ENABLE LOGS -SET parallel_view_processing=0; SET log_query_views=1; SET log_queries_min_type='QUERY_FINISH'; SET log_queries=1; diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.reference b/tests/queries/0_stateless/02010_array_index_bad_cast.reference index e69de29bb2d..e22493782f0 100644 --- a/tests/queries/0_stateless/02010_array_index_bad_cast.reference +++ b/tests/queries/0_stateless/02010_array_index_bad_cast.reference @@ -0,0 +1,3 @@ +1 +0 +0 diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.sql b/tests/queries/0_stateless/02010_array_index_bad_cast.sql index 14162e0d2e2..590e60eb42e 100644 --- a/tests/queries/0_stateless/02010_array_index_bad_cast.sql +++ b/tests/queries/0_stateless/02010_array_index_bad_cast.sql @@ -1,3 +1,4 @@ --- This query throws exception about uncomparable data types (but at least it does not introduce bad cast in code). SET allow_suspicious_low_cardinality_types=1; -SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); -- { serverError ILLEGAL_COLUMN } +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14 00:00:01'::DateTime64(7))); +SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize(NULL)); diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference index 2d9f236ada9..e0cc8f0ce63 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference @@ -1,8 +1,8 @@ -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 -18 36 27 36 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data -18 18 18 18 -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 -18 36 27 36 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent +18 18 9 18 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent +18 9 9 9 +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent +18 18 9 18 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent 18 18 18 18 diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql index 465c8d6136c..fdd75b91b1f 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql @@ -1,6 +1,6 @@ -- Tags: long -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent'; drop table if exists test sync; drop table if exists test_mv_a sync; @@ -35,7 +35,7 @@ select (select sum(c) from test_mv_c where test='case1'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent'; set deduplicate_blocks_in_dependent_materialized_views=1; @@ -53,7 +53,7 @@ select (select sum(c) from test_mv_c where test='case2'); -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent'; set deduplicate_blocks_in_dependent_materialized_views=0; @@ -70,7 +70,7 @@ select (select sum(c) from test_mv_b where test='case3'), (select sum(c) from test_mv_c where test='case3'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent'; set deduplicate_blocks_in_dependent_materialized_views=1; diff --git a/tests/queries/0_stateless/02125_query_views_log.sql b/tests/queries/0_stateless/02125_query_views_log.sql index ba50902ebea..d2d19b76a1f 100644 --- a/tests/queries/0_stateless/02125_query_views_log.sql +++ b/tests/queries/0_stateless/02125_query_views_log.sql @@ -8,7 +8,7 @@ create table dst (key Int) engine=Null(); create materialized view mv1 to dst as select * from src; create materialized view mv2 to dst as select * from src; -insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=0; +insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=1; system flush logs; -- { echo } diff --git a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql index 4406a05df0c..ecf4f9cab93 100644 --- a/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql +++ b/tests/queries/0_stateless/02294_nothing_arguments_in_functions.sql @@ -1,3 +1,5 @@ +set enable_named_columns_in_function_tuple = 0; + select arrayMap(x -> 2 * x, []); select toTypeName(arrayMap(x -> 2 * x, [])); select arrayMap((x, y) -> x + y, [], []); diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index a152066a460..8dd8910c858 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -876,7 +876,6 @@ tryBase58Decode tumble tumbleEnd tumbleStart -tuple tupleConcat tupleDivide tupleDivideByNumber diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference index bd3b689ca3c..d997b7ba830 100644 --- a/tests/queries/0_stateless/02447_drop_database_replica.reference +++ b/tests/queries/0_stateless/02447_drop_database_replica.reference @@ -12,11 +12,21 @@ t 2 rdb_default 1 1 s1 r1 1 2 -2 -2 +skip inactive s1 r1 OK 2 0 s1 r2 QUEUED 2 0 s2 r1 QUEUED 2 0 +s1 r1 OK 2 0 +s1 r2 QUEUED 2 0 +s2 r1 QUEUED 2 0 +timeout on active +2 +2 +s1 r1 OK 3 0 +s1 r2 QUEUED 3 0 +s2 r1 QUEUED 3 0 +s9 r9 QUEUED 3 0 +drop replica 2 rdb_default 1 1 s1 r1 1 rdb_default 1 2 s1 r2 0 @@ -24,6 +34,9 @@ rdb_default 1 2 s1 r2 0 2 t t2 +t22 t3 +t33 t4 +t44 rdb_default_4 1 1 s1 r1 1 diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh index 1604d527f2b..93a5fcee8e2 100755 --- a/tests/queries/0_stateless/02447_drop_database_replica.sh +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -33,10 +33,27 @@ $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_na $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it" # Also check that it doesn't exceed distributed_ddl_task_timeout waiting for inactive replicas -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=none_only_active -q "create table $db.t2 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" -timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +echo 'skip inactive' +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=none_only_active -q "create table $db.t2 (n int) engine=Log" +timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" | sort timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t4 (n int) engine=Log" | sort +# And that it still throws TIMEOUT_EXCEEDED for active replicas +echo 'timeout on active' +db9="${db}_9" +$CLICKHOUSE_CLIENT -q "create database $db9 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's9', 'r9')" +$CLICKHOUSE_CLIENT -q "detach database $db9" +$CLICKHOUSE_CLIENT -q "insert into system.zookeeper(name, path, value) values ('active', '/test/$CLICKHOUSE_DATABASE/rdb/replicas/s9|r9', '$($CLICKHOUSE_CLIENT -q "select serverUUID()")')" + +$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=none_only_active -q "create table $db.t22 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t33 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED" +$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=5 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t44 (n int) engine=Log" | sort + +$CLICKHOUSE_CLIENT -q "attach database $db9" +$CLICKHOUSE_CLIENT -q "drop database $db9" + +echo 'drop replica' + $CLICKHOUSE_CLIENT -q "detach database $db3" $CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db" $CLICKHOUSE_CLIENT -q "attach database $db3" 2>/dev/null diff --git a/tests/queries/0_stateless/02494_query_cache_empty_tuple.reference b/tests/queries/0_stateless/02494_query_cache_empty_tuple.reference new file mode 100644 index 00000000000..50e44edaecb --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_empty_tuple.reference @@ -0,0 +1,2 @@ +() 0 +() 0 diff --git a/tests/queries/0_stateless/02494_query_cache_empty_tuple.sql b/tests/queries/0_stateless/02494_query_cache_empty_tuple.sql new file mode 100644 index 00000000000..8e133143ef8 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_empty_tuple.sql @@ -0,0 +1,2 @@ +SELECT tuple(), 0 FROM numbers(1) SETTINGS use_query_cache = true; +SELECT tuple(), 0 FROM numbers(1) SETTINGS use_query_cache = true; diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 4a4e898c5bd..77ef213b36d 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -332,12 +332,13 @@ SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it d Expression (Projection) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING)) - Aggregating - Expression ((Before GROUP BY + Projection)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - ReadFromSystemNumbers + Filter ((WHERE + (Projection + Before ORDER BY))) + Filter (HAVING) + Aggregating + Expression ((Before GROUP BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromSystemNumbers -- execute 1 2 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference deleted file mode 100644 index 8d0f56ba185..00000000000 --- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference +++ /dev/null @@ -1,173 +0,0 @@ -query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='sipHash64(x)' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -Hello -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -Hello -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -Hello -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -Hello -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -Hello -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -Hello -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -Hello -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -Hello -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -Hello -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -Hello -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -Hello -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -Hello -query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='y' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' -filter_type='default' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=0 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='default' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=1 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=2 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -filter_type='range' max_replicas=3 prefer_localhost_replica=1 -0 334 -1 333 -2 333 -1 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh deleted file mode 100755 index dccb680be42..00000000000 --- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-parallel, long - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -function run_with_custom_key { - echo "query='$1' with custom_key='$2'" - for prefer_localhost_replica in 0 1; do - for filter_type in 'default' 'range'; do - for max_replicas in {1..3}; do - echo "filter_type='$filter_type' max_replicas=$max_replicas prefer_localhost_replica=$prefer_localhost_replica" - query="$1 SETTINGS max_parallel_replicas=$max_replicas\ - , parallel_replicas_custom_key='$2'\ - , parallel_replicas_custom_key_filter_type='$filter_type'\ - , prefer_localhost_replica=$prefer_localhost_replica" - $CLICKHOUSE_CLIENT --query="$query" - done - done - done -} - -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key"; - -$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String) ENGINE = MergeTree ORDER BY x"; -$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key VALUES ('Hello')"; - -run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)" "sipHash64(x)" - -$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key" - -$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String, y UInt32) ENGINE = MergeTree ORDER BY cityHash64(x)" -$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key SELECT toString(number), number % 3 FROM numbers(1000)" - -function run_count_with_custom_key { - run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y" "$1" -} - -run_count_with_custom_key "y" -run_count_with_custom_key "cityHash64(y)" -run_count_with_custom_key "cityHash64(y) + 1" - -$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) as t1 JOIN 02535_custom_key USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" - -$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key" diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference new file mode 100644 index 00000000000..1bb07f0d916 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference @@ -0,0 +1,177 @@ +query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt)' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT * FROM 02535_custom_key_mt' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +1 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh new file mode 100755 index 00000000000..fad43ea9070 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Tags: no-parallel, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function run_with_custom_key { + echo "query='$1' with custom_key='$2'" + for filter_type in 'default' 'range'; do + for max_replicas in {1..3}; do + echo "filter_type='$filter_type' max_replicas=$max_replicas" + query="$1 SETTINGS max_parallel_replicas=$max_replicas\ +, parallel_replicas_custom_key='$2'\ +, parallel_replicas_custom_key_filter_type='$filter_type'\ +, parallel_replicas_for_non_replicated_merge_tree=1 \ +, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'" + $CLICKHOUSE_CLIENT --query="$query" + done + done +} + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_mt"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_mt (x String) ENGINE = MergeTree ORDER BY x"; +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_mt VALUES ('Hello')"; + +run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt)" "sipHash64(x)" +run_with_custom_key "SELECT * FROM 02535_custom_key_mt" "sipHash64(x)" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_mt" + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_mt (x String, y UInt32) ENGINE = MergeTree ORDER BY cityHash64(x)" +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_mt SELECT toString(number), number % 3 FROM numbers(1000)" + +function run_count_with_custom_key_distributed { + run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key_distributed "y" +run_count_with_custom_key_distributed "cityHash64(y)" +run_count_with_custom_key_distributed "cityHash64(y) + 1" + +function run_count_with_custom_key_merge_tree { + run_with_custom_key "SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key_merge_tree "y" +run_count_with_custom_key_merge_tree "cityHash64(y)" +run_count_with_custom_key_merge_tree "cityHash64(y) + 1" + +$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) as t1 JOIN 02535_custom_key_mt USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_mt" diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference new file mode 100644 index 00000000000..c6526b506d3 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference @@ -0,0 +1,177 @@ +query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt)' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT * FROM 02535_custom_key_rmt' with custom_key='sipHash64(x)' +filter_type='default' max_replicas=1 +Hello +filter_type='default' max_replicas=2 +Hello +filter_type='default' max_replicas=3 +Hello +filter_type='range' max_replicas=1 +Hello +filter_type='range' max_replicas=2 +Hello +filter_type='range' max_replicas=3 +Hello +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='y' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='cityHash64(y)' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1' +filter_type='default' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='default' max_replicas=3 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=1 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=2 +0 334 +1 333 +2 333 +filter_type='range' max_replicas=3 +0 334 +1 333 +2 333 +1 diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh new file mode 100755 index 00000000000..6350f5027f9 --- /dev/null +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Tags: no-parallel, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function run_with_custom_key { + echo "query='$1' with custom_key='$2'" + for filter_type in 'default' 'range'; do + for max_replicas in {1..3}; do + echo "filter_type='$filter_type' max_replicas=$max_replicas" + query="$1 SETTINGS max_parallel_replicas=$max_replicas\ +, parallel_replicas_custom_key='$2'\ +, parallel_replicas_custom_key_filter_type='$filter_type'\ +, parallel_replicas_for_non_replicated_merge_tree=1 \ +, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'" + $CLICKHOUSE_CLIENT --query="$query" + done + done +} + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_rmt"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_rmt (x String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_02535', 'r1') ORDER BY x"; +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_rmt VALUES ('Hello')"; + +run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt)" "sipHash64(x)" +run_with_custom_key "SELECT * FROM 02535_custom_key_rmt" "sipHash64(x)" + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_rmt_hash"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_rmt_hash (x String, y UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_02535_hash', 'r1') ORDER BY cityHash64(x)" +$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_rmt_hash SELECT toString(number), number % 3 FROM numbers(1000)" + +function run_count_with_custom_key { + run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key "y" +run_count_with_custom_key "cityHash64(y)" +run_count_with_custom_key "cityHash64(y) + 1" + +function run_count_with_custom_key_merge_tree { + run_with_custom_key "SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y" "$1" +} + +run_count_with_custom_key_merge_tree "y" +run_count_with_custom_key_merge_tree "cityHash64(y)" +run_count_with_custom_key_merge_tree "cityHash64(y) + 1" + +$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) as t1 JOIN 02535_custom_key_rmt_hash USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with" + +$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_rmt_hash" diff --git a/tests/queries/0_stateless/02541_tuple_element_with_null.sql b/tests/queries/0_stateless/02541_tuple_element_with_null.sql index d2062b60d49..e1581ce3755 100644 --- a/tests/queries/0_stateless/02541_tuple_element_with_null.sql +++ b/tests/queries/0_stateless/02541_tuple_element_with_null.sql @@ -9,7 +9,7 @@ SETTINGS index_granularity = 8192; INSERT INTO test_tuple_element VALUES (tuple(1,2)), (tuple(NULL, 3)); -SELECT +SELECT tupleElement(tuple, 'k1', 0) fine_k1_with_0, tupleElement(tuple, 'k1', NULL) k1_with_null, tupleElement(tuple, 'k2', 0) k2_with_0, diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference index 70bcd7f255b..9bb0c022752 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference @@ -29,16 +29,20 @@ WHERE type_1 = \'all\' ExpressionTransform × 2 (Filter) FilterTransform × 2 - (Aggregating) - ExpressionTransform × 2 - AggregatingTransform × 2 - Copy 1 → 2 - (Expression) - ExpressionTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Filter) + FilterTransform × 2 + (Filter) + FilterTransform × 2 + (Aggregating) + ExpressionTransform × 2 + AggregatingTransform × 2 + Copy 1 → 2 + (Expression) + ExpressionTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Filter) @@ -64,10 +68,14 @@ ExpressionTransform × 2 ExpressionTransform × 2 AggregatingTransform × 2 Copy 1 → 2 - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Filter) + FilterTransform + (Filter) + FilterTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Aggregating) diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh index 2b9e5296a05..20b02bcba32 100755 --- a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh @@ -14,14 +14,11 @@ function run_test_for_disk() echo "$disk" - clickhouse-disks -C "$config" --disk "$disk" write --input "$config" $CLICKHOUSE_DATABASE/test - clickhouse-disks -C "$config" --log-level test --disk "$disk" copy $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy |& { + clickhouse-disks -C "$config" --disk "$disk" --query "write --path-from $config $CLICKHOUSE_DATABASE/test" + clickhouse-disks -C "$config" --log-level test --disk "$disk" --query "copy -r $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test - # NOTE: this is due to "copy" does works like "cp -R from to/" instead of "cp from to" - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy/test - clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy + clickhouse-disks -C "$config" --disk "$disk" --query "remove -r $CLICKHOUSE_DATABASE/test" } function run_test_copy_from_s3_to_s3(){ @@ -29,13 +26,12 @@ function run_test_copy_from_s3_to_s3(){ local disk_dest=$1 && shift echo "copy from $disk_src to $disk_dest" - clickhouse-disks -C "$config" --disk "$disk_src" write --input "$config" $CLICKHOUSE_DATABASE/test + clickhouse-disks -C "$config" --disk "$disk_src" --query "write --path-from $config $CLICKHOUSE_DATABASE/test" - clickhouse-disks -C "$config" --log-level test copy --disk-from "$disk_src" --disk-to "$disk_dest" $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy |& { + clickhouse-disks -C "$config" --log-level test --query "copy -r --disk-from $disk_src --disk-to $disk_dest $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy" |& { grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } - clickhouse-disks -C "$config" --disk "$disk_dest" remove $CLICKHOUSE_DATABASE/test.copy/test - clickhouse-disks -C "$config" --disk "$disk_dest" remove $CLICKHOUSE_DATABASE/test.copy + clickhouse-disks -C "$config" --disk "$disk_dest" --query "remove -r $CLICKHOUSE_DATABASE/test.copy" } run_test_for_disk s3_plain_native_copy diff --git a/tests/queries/0_stateless/02834_apache_arrow_abort.sql b/tests/queries/0_stateless/02834_apache_arrow_abort.sql index 47db46f1e43..bd29e95db9a 100644 --- a/tests/queries/0_stateless/02834_apache_arrow_abort.sql +++ b/tests/queries/0_stateless/02834_apache_arrow_abort.sql @@ -1,4 +1,4 @@ -- Tags: no-fasttest -- This tests depends on internet access, but it does not matter, because it only has to check that there is no abort due to a bug in Apache Arrow library. -INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION } +INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION } diff --git a/tests/queries/0_stateless/02864_replace_regexp_string_fallback.reference b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.reference new file mode 100644 index 00000000000..dd52d49eea3 --- /dev/null +++ b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.reference @@ -0,0 +1 @@ +Hello l x Hexlo Hexxo diff --git a/tests/queries/0_stateless/02864_replace_regexp_string_fallback.sql b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.sql new file mode 100644 index 00000000000..917c11fe8dd --- /dev/null +++ b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.sql @@ -0,0 +1,11 @@ +-- Tests functions replaceRegexpAll and replaceRegexpOne with trivial patterns. These trigger internally a fallback to simple string replacement. + +-- _materialize_ because the shortcut is only implemented for non-const haystack + const needle + const replacement strings + +SELECT 'Hello' AS haystack, 'l' AS needle, 'x' AS replacement, replaceRegexpOne(materialize(haystack), needle, replacement), replaceRegexpAll(materialize(haystack), needle, replacement); + +-- negative tests + +-- Even if the fallback is used, invalid substitutions must throw an exception. +SELECT 'Hello' AS haystack, 'l' AS needle, '\\1' AS replacement, replaceRegexpOne(materialize(haystack), needle, replacement); -- { serverError BAD_ARGUMENTS } +SELECT 'Hello' AS haystack, 'l' AS needle, '\\1' AS replacement, replaceRegexpAll(materialize(haystack), needle, replacement); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02890_named_tuple_functions.reference b/tests/queries/0_stateless/02890_named_tuple_functions.reference new file mode 100644 index 00000000000..f7a0c440b5a --- /dev/null +++ b/tests/queries/0_stateless/02890_named_tuple_functions.reference @@ -0,0 +1,9 @@ +Tuple(\n i Int32,\n j Int32) +['i','j'] +Tuple(UInt8, Int32) +['1','2'] +Tuple(\n k UInt8,\n j Int32) +['k','j'] +Tuple(Int32, Int32, Int32, Int32) +['1','2','3','4'] +(1,2,3) diff --git a/tests/queries/0_stateless/02890_named_tuple_functions.sql b/tests/queries/0_stateless/02890_named_tuple_functions.sql new file mode 100644 index 00000000000..8e0c9c2b10e --- /dev/null +++ b/tests/queries/0_stateless/02890_named_tuple_functions.sql @@ -0,0 +1,31 @@ +set enable_named_columns_in_function_tuple = 1; +set allow_experimental_analyzer = 1; + +drop table if exists x; +create table x (i int, j int) engine MergeTree order by i; +insert into x values (1, 2); + +select toTypeName(tuple(i, j)) from x; +select tupleNames(tuple(i, j)) from x; + +select toTypeName(tuple(1, j)) from x; +select tupleNames(tuple(1, j)) from x; + +select toTypeName(tuple(1 as k, j)) from x; +select tupleNames(tuple(1 as k, j)) from x; + +select toTypeName(tuple(i, i, j, j)) from x; +select tupleNames(tuple(i, i, j, j)) from x; + +select tupleNames(1); -- { serverError 43 } + +drop table x; + +drop table if exists tbl; + +-- Make sure named tuple won't break Values insert +create table tbl (x Tuple(a Int32, b Int32, c Int32)) engine MergeTree order by (); +insert into tbl values (tuple(1, 2, 3)); -- without tuple it's interpreted differently inside values block. +select * from tbl; + +drop table tbl diff --git a/tests/queries/0_stateless/02890_untuple_column_names.reference b/tests/queries/0_stateless/02890_untuple_column_names.reference index 388f974c45f..13a85c70138 100644 --- a/tests/queries/0_stateless/02890_untuple_column_names.reference +++ b/tests/queries/0_stateless/02890_untuple_column_names.reference @@ -57,6 +57,10 @@ t.1: 1 Row 1: ────── t.1: 1 +-- tuple() with enable_named_columns_in_function_tuple = 1 and allow_experimental_analyzer = 1 keeps the column names +Row 1: +────── +t.a: 1 -- thankfully JSONExtract() keeps them Row 1: ────── diff --git a/tests/queries/0_stateless/02890_untuple_column_names.sql b/tests/queries/0_stateless/02890_untuple_column_names.sql index ab6748cb54d..cd490ca3522 100644 --- a/tests/queries/0_stateless/02890_untuple_column_names.sql +++ b/tests/queries/0_stateless/02890_untuple_column_names.sql @@ -37,8 +37,11 @@ SELECT untuple(tuple(1)::Tuple(Int)), untuple(tuple(1)::Tuple(Int)) FORMAT Verti SELECT untuple(tuple(1)::Tuple(Int)), untuple(tuple(1)::Tuple(Int)) FORMAT Vertical SETTINGS allow_experimental_analyzer = 1; -- Bug: doesn't throw an exception SELECT '-- tuple() loses the column names (would be good to fix, see #36773)'; -SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 0; -SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 1; +SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 0, enable_named_columns_in_function_tuple = 0; +SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 1, enable_named_columns_in_function_tuple = 0; + +SELECT '-- tuple() with enable_named_columns_in_function_tuple = 1 and allow_experimental_analyzer = 1 keeps the column names'; +SELECT untuple(tuple(1 as a)) as t FORMAT Vertical SETTINGS allow_experimental_analyzer = 1, enable_named_columns_in_function_tuple = 1; SELECT '-- thankfully JSONExtract() keeps them'; SELECT untuple(JSONExtract('{"key": "value"}', 'Tuple(key String)')) x FORMAT Vertical SETTINGS allow_experimental_analyzer = 0; diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference index 07deb7c2565..335b55f05c8 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference @@ -10,14 +10,13 @@ 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 84 -2023-09-01 12:00:00 42 +2022-09-01 12:00:00 42 -- Original issue with deduplicate_blocks_in_dependent_materialized_views = 1 AND max_insert_delayed_streams_for_parallel_write > 1 -- Landing 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 84 +2022-09-01 12:00:00 42 2023-09-01 12:00:00 42 -- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184 -- Landing (Agg/Replacing)MergeTree diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index a2378fd8f67..f206f0d7775 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -54,9 +54,8 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view - 1st insert works for landing and mv tables - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table - - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded, now that block is inserted because deduplicate_blocks_in_dependent_materialized_views=0 + - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded - Now it is fixed. */ SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 1000; @@ -98,7 +97,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view This is what happens now: - 1st insert works for landing and mv tables - - 2nd insert gets first block 20220901 deduplicated for landing and both rows are inserted for mv tables + - 2nd insert gets first block 20220901 deduplicated and second one inserted for landing and mv tables */ SET deduplicate_blocks_in_dependent_materialized_views = 1, max_insert_delayed_streams_for_parallel_write = 1000; diff --git a/tests/queries/0_stateless/02941_variant_type_1.reference b/tests/queries/0_stateless/02941_variant_type_1.reference index 8a6e77d4f6d..53e5a556821 100644 --- a/tests/queries/0_stateless/02941_variant_type_1.reference +++ b/tests/queries/0_stateless/02941_variant_type_1.reference @@ -91,42 +91,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -145,21 +145,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -256,42 +256,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -310,23 +310,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -421,42 +421,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -475,23 +475,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- MergeTree compact @@ -587,42 +587,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -641,21 +641,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -751,42 +751,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -805,21 +805,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -916,42 +916,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -970,23 +970,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test2 select @@ -1080,42 +1080,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1134,23 +1134,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -1245,42 +1245,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -1299,23 +1299,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- test3 select @@ -1409,42 +1409,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -1463,23 +1463,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- MergeTree wide @@ -1575,42 +1575,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1629,21 +1629,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -1739,42 +1739,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 0 1 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1793,21 +1793,21 @@ lc_str_2 [0] [0,1] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 2 3 @@ -1904,42 +1904,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -1958,23 +1958,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test2 select @@ -2068,42 +2068,42 @@ lc_str_2 (0,0) (0,0) (0,0) -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N 0 -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 2 -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 -\N -\N -\N +0 +0 +0 [] [] [] @@ -2122,23 +2122,23 @@ lc_str_2 [0] [] [0,1,2] -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 -\N +0 3 ----------------------------------------------------------------------------------------------------------- test3 insert @@ -2233,42 +2233,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -2287,23 +2287,23 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- test3 select @@ -2397,42 +2397,42 @@ lc_str_15 (0,0) (16,17) (0,0) -\N -\N -\N -\N +0 +0 +0 +0 4 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 10 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 16 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 5 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 11 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 17 -\N +0 [] [] [] @@ -2451,22 +2451,22 @@ lc_str_15 [] [] [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 6 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 12 -\N -\N -\N -\N -\N +0 +0 +0 +0 +0 18 ----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_2.reference b/tests/queries/0_stateless/02941_variant_type_2.reference index 20a5176cb5e..1d9126aa230 100644 --- a/tests/queries/0_stateless/02941_variant_type_2.reference +++ b/tests/queries/0_stateless/02941_variant_type_2.reference @@ -6,9 +6,6 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 MergeTree compact test4 insert test4 select @@ -17,18 +14,12 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 test4 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 MergeTree wide test4 insert test4 select @@ -37,15 +28,9 @@ test4 select 100000 100000 100000 -100000 -100000 -100000 test4 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh index f43cd2bb0d6..8453bce98dc 100755 --- a/tests/queries/0_stateless/02941_variant_type_2.sh +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -33,13 +33,10 @@ select v.\`LowCardinality(String)\` from test format Null; select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); -select v.\`Array(UInt64)\`.size0 from test format Null; -select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +select v.\`Array(UInt64)\`.size0 from test format Null;" } function run() diff --git a/tests/queries/0_stateless/02941_variant_type_3.reference b/tests/queries/0_stateless/02941_variant_type_3.reference index 1ccdb3acdff..d28aa7a594b 100644 --- a/tests/queries/0_stateless/02941_variant_type_3.reference +++ b/tests/queries/0_stateless/02941_variant_type_3.reference @@ -6,9 +6,6 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 MergeTree compact test5 insert test5 select @@ -17,18 +14,12 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 test5 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 MergeTree wide test5 insert test5 select @@ -37,15 +28,9 @@ test5 select 100000 100000 100000 -100000 -100000 -100000 test5 select 500000 100000 100000 100000 100000 -100000 -100000 -100000 diff --git a/tests/queries/0_stateless/02941_variant_type_3.sh b/tests/queries/0_stateless/02941_variant_type_3.sh index f4b2b304f56..990eb25b5be 100755 --- a/tests/queries/0_stateless/02941_variant_type_3.sh +++ b/tests/queries/0_stateless/02941_variant_type_3.sh @@ -35,13 +35,10 @@ select v.\`LowCardinality(String)\` from test format Null; select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; -select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); -select v.\`Array(UInt64)\`.size0 from test format Null; -select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +select v.\`Array(UInt64)\`.size0 from test format Null;" } function run() diff --git a/tests/queries/0_stateless/02941_variant_type_4.reference b/tests/queries/0_stateless/02941_variant_type_4.reference index e13d5820343..d1630b04347 100644 --- a/tests/queries/0_stateless/02941_variant_type_4.reference +++ b/tests/queries/0_stateless/02941_variant_type_4.reference @@ -6,9 +6,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- MergeTree compact test6 insert @@ -18,9 +15,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- test6 select 1000000 @@ -28,9 +22,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- MergeTree wide test6 insert @@ -40,9 +31,6 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- test6 select 1000000 @@ -50,7 +38,4 @@ test6 select 200000 200000 200000 -200000 -200000 -200000 ----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh index f9a16847864..b8f619694b0 100755 --- a/tests/queries/0_stateless/02941_variant_type_4.sh +++ b/tests/queries/0_stateless/02941_variant_type_4.sh @@ -29,13 +29,10 @@ function test6_select() select count() from test where isNotNull(v.\`LowCardinality(String)\`); select v.\`Tuple(a UInt32, b UInt32)\` from test format Null; select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null; - select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a); select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null; - select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b); select v.\`Array(UInt64)\` from test format Null; select count() from test where not empty(v.\`Array(UInt64)\`); - select v.\`Array(UInt64)\`.size0 from test format Null; - select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" + select v.\`Array(UInt64)\`.size0 from test format Null;" echo "-----------------------------------------------------------------------------------------------------------" } diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference index 531163e1d84..3135f2d01e1 100644 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference @@ -3,28 +3,28 @@ data after ATTACH 1 Files before DETACH TABLE all_1_1_0 -backups/ordinary_default/data/ordinary_default/data/all_1_1_0: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json Files after DETACH TABLE all_1_1_0 -backups/ordinary_default/data/ordinary_default/data/all_1_1_0: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh index 12d08159012..d543f7195a9 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh @@ -49,11 +49,11 @@ path=$($CLICKHOUSE_CLIENT -q "SELECT replace(data_paths[1], 's3_plain', '') FROM path=${path%/} echo "Files before DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 $CLICKHOUSE_CLIENT -q "detach table data" echo "Files after DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 # metadata file is left $CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference index 1e191b719a5..a2dd196083e 100644 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference @@ -3,28 +3,28 @@ data after ATTACH 1 Files before DETACH TABLE all_X_X_X -backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json Files after DETACH TABLE all_X_X_X -backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: -primary.cidx -serialization.json -metadata_version.txt -default_compression_codec.txt +/backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +checksums.txt +columns.txt +count.txt data.bin data.cmrk3 -count.txt -columns.txt -checksums.txt +default_compression_codec.txt +metadata_version.txt +primary.cidx +serialization.json diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh index b079e67a000..eec05c81344 100755 --- a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh @@ -55,14 +55,14 @@ path=${path%/} echo "Files before DETACH TABLE" # sed to match any part, since in case of fault injection part name may not be all_0_0_0 but all_1_1_0 -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' $CLICKHOUSE_CLIENT -nm -q " detach table data_read; detach table data_write; " echo "Files after DETACH TABLE" -clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' +clickhouse-disks -C "$config" --disk s3_plain_disk --query "list --recursive $path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' # metadata file is left $CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.reference b/tests/queries/0_stateless/02993_lazy_index_loading.reference index 5bc329ae4eb..08f07a92815 100644 --- a/tests/queries/0_stateless/02993_lazy_index_loading.reference +++ b/tests/queries/0_stateless/02993_lazy_index_loading.reference @@ -1,4 +1,4 @@ -100000000 140000000 +100000000 100000000 0 0 1 100000000 100000000 diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python deleted file mode 100644 index dd1058518c9..00000000000 --- a/tests/queries/0_stateless/03008_deduplication.python +++ /dev/null @@ -1,657 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys -import argparse -import string - - -CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, "helpers")) - - -def __format(template, **params): - field_names = [v[1] for v in string.Formatter().parse(template) if v[1] is not None] - kv_args = {} - for field in field_names: - if field in params: - kv_args[field] = params[field] - else: - kv_args[field] = "" - - return template.format(**kv_args) - - -def instance_create_statement( - table_name, - table_columns, - table_keys, - table_engine, - with_deduplication, - no_merges=True, -): - template = """ - CREATE TABLE {table_name} - {table_columns} - ENGINE = {table_engine} - ORDER BY {table_keys} - {table_settings}; - {table_no_merges} - """ - - params = dict() - params["table_name"] = table_name - params["table_columns"] = table_columns - params["table_keys"] = table_keys - params["table_no_merges"] = f"SYSTEM STOP MERGES {table_name};" if no_merges else "" - params["table_engine"] = ( - "MergeTree()" - if table_engine == "MergeTree" - else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" - ) - - deduplication_window_setting_name = ( - "non_replicated_deduplication_window" - if table_engine == "MergeTree" - else "replicated_deduplication_window" - ) - deduplication_window_setting_value = 1000 if with_deduplication else 0 - - settings = list() - settings += [ - f"{deduplication_window_setting_name}={deduplication_window_setting_value}" - ] - params["table_settings"] = "SETTINGS " + ",".join(settings) - - return __format(template, **params) - - -def instance_insert_statement( - table_name, count, insert_method, insert_unique_blocks, use_insert_token -): - insert_settings = ( - "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" - ) - - if insert_method == "InsertSelect": - template = """ - INSERT INTO {table_name} - SELECT {insert_columns} - FROM numbers({count}) {insert_settings}; - """ - return __format( - template, - table_name=table_name, - count=count, - insert_columns="'src_4', 4" - if not insert_unique_blocks - else "'src_' || toString(number), number", - insert_settings=insert_settings, - ) - - else: - template = """ - INSERT INTO {table_name} - {insert_settings} VALUES {insert_values}; - """ - - values = [] - for i in range(count): - values += ( - [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] - ) - insert_values = ", ".join(values) - - return __format( - template, - table_name=table_name, - insert_settings=insert_settings, - insert_values=insert_values, - ) - - -def get_drop_tables_statements(tables): - return "".join( - [f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]] - ) - - -def get_logs_statement(args): - if args.get_logs: - return "SET send_logs_level='test';" - return "" - - -def str2bool(v): - if isinstance(v, bool): - return v - if v.lower() in ("yes", "true", "t", "y", "1"): - return True - elif v.lower() in ("no", "false", "f", "n", "0"): - return False - else: - raise argparse.ArgumentTypeError("Boolean value expected.") - - -class ArgsFactory: - def __init__(self, parser): - self.__parser = parser - - def add_opt_engine(self): - self.__parser.add_argument( - "--table-engine", - choices=["ReplicatedMergeTree", "MergeTree"], - default="MergeTree", - ) - - def add_opt_user_token(self): - self.__parser.add_argument( - "--use-insert-token", type=str2bool, nargs="?", const=True, default=False - ) - - def add_opt_single_thread(self): - self.__parser.add_argument( - "--single-thread", type=str2bool, nargs="?", const=True, default=True - ) - - def add_opt_dedup_src(self): - self.__parser.add_argument( - "--deduplicate-src-table", - type=str2bool, - nargs="?", - const=True, - default=True, - ) - - def add_opt_dedup_dst(self): - self.__parser.add_argument( - "--deduplicate-dst-table", - type=str2bool, - nargs="?", - const=True, - default=True, - ) - - def add_opt_get_logs(self): - self.__parser.add_argument( - "--get-logs", type=str2bool, nargs="?", const=True, default=False - ) - - def add_opt_uniq_blocks(self): - self.__parser.add_argument( - "--insert-unique-blocks", type=str2bool, nargs="?", const=True, default=True - ) - - def add_opt_insert_method(self): - self.__parser.add_argument( - "--insert-method", - choices=["InsertSelect", "InsertValues"], - default="InsertSelect", - ) - - def add_all(self): - self.add_opt_engine() - self.add_opt_user_token() - self.add_opt_single_thread() - self.add_opt_dedup_src() - self.add_opt_dedup_dst() - self.add_opt_get_logs() - self.add_opt_insert_method() - self.add_opt_uniq_blocks() - - -def test_insert_several_blocks(parser): - ArgsFactory(parser).add_all() - - def calle(args): - create_table_a_b_statement = instance_create_statement( - table_name="table_a_b", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_when_b_even_statement = instance_create_statement( - table_name="table_when_b_even", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - create_mv_statement = """ - CREATE MATERIALIZED VIEW mv_b_even - TO table_when_b_even - AS - SELECT a, b - FROM table_a_b - WHERE b % 2 = 0; - """ - - drop_tables_statements = get_drop_tables_statements( - ["table_a_b", "table_when_b_even", "mv_b_even"] - ) - - insert_statement = instance_insert_statement( - "table_a_b", - 10, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - print_details_statements = f""" - SELECT 'table_a_b'; - SELECT 'count', count() FROM table_a_b; - {"" if not args.get_logs else "SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part;"} - - SELECT 'table_when_b_even'; - SELECT 'count', count() FROM table_when_b_even; - {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 10 ) - FROM table_a_b; - SELECT throwIf( count() != 5 ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {5 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = """ - SELECT throwIf( count() != 10 ) - FROM table_a_b; - SELECT throwIf( count() != 10 ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 20} ) - FROM table_a_b; - SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - {create_table_a_b_statement} - - {create_table_when_b_even_statement} - - {create_mv_statement} - - -- first insert - {insert_statement} - - {print_details_statements} - - {assert_first_insert_statements} - - -- second insert, it is retry - {insert_statement} - - {print_details_statements} - - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def test_mv_generates_several_blocks(parser): - ArgsFactory(parser).add_all() - - def calle(args): - tables = [ - "table_for_join_with", - "table_a_b", - "table_when_b_even_and_joined", - "mv_b_even", - ] - drop_tables_statements = get_drop_tables_statements(tables) - - details_print_for_table_for_join_with = "" - if args.get_logs: - details_print_for_table_for_join_with = """ - SELECT 'table_for_join_with'; - SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; - """ - - create_table_a_b_statement = instance_create_statement( - table_name="table_a_b", - table_columns="(a_src String, b UInt64)", - table_keys="(a_src, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_when_b_even_and_joined_statement = instance_create_statement( - table_name="table_when_b_even_and_joined", - table_columns="(a_src String, a_join String, b UInt64)", - table_keys="(a_src, a_join, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - insert_statement = instance_insert_statement( - "table_a_b", - 5, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - details_print_statements = f""" - SELECT 'table_a_b'; - SELECT 'count', count() FROM table_a_b; - - SELECT 'table_when_b_even_and_joined'; - SELECT 'count', count() FROM table_when_b_even_and_joined; - {"" if not args.get_logs else "SELECT _part, a_src, a_join, b FROM table_when_b_even_and_joined ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 5 ) - FROM table_a_b; - - SELECT throwIf( count() != 9 ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 18} ) - FROM table_when_b_even_and_joined; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) - FROM table_a_b; - - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even_and_joined; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) - FROM table_a_b; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 10} ) - FROM table_when_b_even_and_joined; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) - FROM table_a_b; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 20} ) - FROM table_when_b_even_and_joined; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - CREATE TABLE table_for_join_with - (a_join String, b UInt64) - ENGINE = MergeTree() - ORDER BY (a_join, b); - INSERT INTO table_for_join_with - SELECT 'joined_' || toString(number), number - FROM numbers(1); - {details_print_for_table_for_join_with} - - {create_table_a_b_statement} - SYSTEM STOP MERGES table_a_b; - - {create_table_when_b_even_and_joined_statement} - SYSTEM STOP MERGES table_when_b_even_and_joined; - - CREATE MATERIALIZED VIEW mv_b_even - TO table_when_b_even_and_joined - AS - SELECT a_src, a_join, table_for_join_with.b as b - FROM table_a_b - FULL OUTER JOIN table_for_join_with - ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 - ORDER BY a_src, a_join, b; - - -- first insert - {insert_statement} - - {details_print_statements} - - -- first assertion - {assert_first_insert_statements} - - -- second insert - {insert_statement} - - {details_print_statements} - - -- second assertion - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def test_several_mv_into_one_table(parser): - ArgsFactory(parser).add_all() - - def calle(args): - tables = ["table_src", "table_dst", "mv_b_even", "mv_b_even_even"] - drop_tables_statements = get_drop_tables_statements(tables) - - create_table_src_statement = instance_create_statement( - table_name="table_src", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_src_table, - ) - - create_table_dst_statement = instance_create_statement( - table_name="table_dst", - table_columns="(a String, b UInt64)", - table_keys="(a, b)", - table_engine=args.table_engine, - with_deduplication=args.deduplicate_dst_table, - ) - - insert_statement = instance_insert_statement( - "table_src", - 8, - args.insert_method, - args.insert_unique_blocks, - args.use_insert_token, - ) - - details_print_statements = f""" - SELECT 'table_src count', count() FROM table_src; - - SELECT 'table_dst count', count() FROM table_dst; - {"" if not args.get_logs else "SELECT _part, count() FROM table_dst GROUP BY _part ORDER BY _part;"} - """ - - if args.insert_unique_blocks: - assert_first_insert_statements = f""" - SELECT throwIf( count() != 8 ) - FROM table_src; - - SELECT throwIf( count() != 6 ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {6 if args.deduplicate_dst_table else 12} ) - FROM table_dst; - """ - else: - if args.use_insert_token: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 8} ) - FROM table_src; - - SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 16} ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 32} ) - FROM table_dst; - """ - else: - assert_first_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 8} ) - FROM table_src; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 16} ) - FROM table_dst; - """ - assert_second_insert_statements = f""" - SELECT throwIf( count() != {1 if args.deduplicate_src_table else 16} ) - FROM table_src; - - SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 32} ) - FROM table_dst; - """ - - script = f""" - {get_logs_statement(args)} - - SET max_insert_threads={1 if args.single_thread else 10}; - SET update_insert_deduplication_token_in_dependent_materialized_views=1; - SET deduplicate_blocks_in_dependent_materialized_views=1; - - SET max_block_size=1; - SET min_insert_block_size_rows=0; - SET min_insert_block_size_bytes=0; - - {drop_tables_statements} - - {create_table_src_statement} - - {create_table_dst_statement} - - CREATE MATERIALIZED VIEW mv_b_even - TO table_dst - AS - SELECT a, b - FROM table_src - WHERE b % 2 = 0; - - CREATE MATERIALIZED VIEW mv_b_even_even - TO table_dst - AS - SELECT a, b - FROM table_src - WHERE b % 4 = 0; - - -- first insert - {insert_statement} - - {details_print_statements} - - {assert_first_insert_statements} - - -- second insert, retry - {insert_statement} - - {details_print_statements} - - {assert_second_insert_statements} - - {drop_tables_statements} - """ - - print(script) - - parser.set_defaults(func=calle) - - -def parse_args(): - parser = argparse.ArgumentParser() - subparsers = parser.add_subparsers(dest="test") - test_insert_several_blocks( - subparsers.add_parser("insert_several_blocks_into_table") - ) - test_mv_generates_several_blocks( - subparsers.add_parser("mv_generates_several_blocks") - ) - test_several_mv_into_one_table(subparsers.add_parser("several_mv_into_one_table")) - args = parser.parse_args() - if args.test is None: - parser.print_help() - return args - - -def main(): - args = parse_args() - if args.test is not None: - args.func(args) - - -if __name__ == "__main__": - main() diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference deleted file mode 100644 index 4893274c1cd..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference +++ /dev/null @@ -1,41 +0,0 @@ -Different materialized view insert into one underlayed table equal data. -first attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -second attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -Different insert operations generate the same data after transformation in underlied table of materialized view. -first attempt -from dst 1 A all_1_1_0 -from mv_dst 0 A all_1_1_0 -second attempt -from dst 1 A all_1_1_0 -from dst 2 A all_2_2_0 -from mv_dst 0 A all_1_1_0 -from mv_dst 0 A all_2_2_0 -Indentical blocks in insertion with `insert_deduplication_token` -first attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -second attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -third attempt -from dst 0 A all_1_1_0 -from dst 0 A all_2_2_0 -Indentical blocks in insertion -from dst 0 A all_1_1_0 -Indentical blocks after materialised view`s transformation -first attempt -from dst 1 B all_1_1_0 -from dst 2 B all_2_2_0 -from mv_dst 0 B all_1_1_0 -from mv_dst 0 B all_2_2_0 -second attempt -from dst 1 B all_1_1_0 -from dst 2 B all_2_2_0 -from mv_dst 0 B all_1_1_0 -from mv_dst 0 B all_2_2_0 diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql deleted file mode 100644 index 7927a6b1edf..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql +++ /dev/null @@ -1,331 +0,0 @@ --- ######### -select 'Different materialized view insert into one underlayed table equal data.'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; -DROP TABLE IF EXISTS mv_first; -DROP TABLE IF EXISTS mv_second; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE TABLE mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_first -TO mv_dst -AS SELECT - 0 AS key, - value AS value -FROM dst; - -CREATE MATERIALIZED VIEW mv_second -TO mv_dst -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_second; -DROP TABLE mv_first; -DROP TABLE mv_dst; -DROP TABLE dst; - - --- ######### -select 'Different insert operations generate the same data after transformation in underlied table of materialized view.'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000 -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst VALUES (1, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst VALUES (2, 'A'); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_dst; -DROP TABLE dst; - - --- ######### -select 'Indentical blocks in insertion with `insert_deduplication_token`'; - -DROP TABLE IF EXISTS dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -select 'first attempt'; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -select 'third attempt'; - -INSERT INTO dst SELECT - 1 AS key, - 'b' AS value -FROM numbers(2) -SETTINGS insert_deduplication_token='some_user_token'; - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -DROP TABLE dst; - - --- ######### -select 'Indentical blocks in insertion'; - -DROP TABLE IF EXISTS dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -INSERT INTO dst SELECT - 0 AS key, - 'A' AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -DROP TABLE dst; - - --- ######### -select 'Indentical blocks after materialised view`s transformation'; - -DROP TABLE IF EXISTS dst; -DROP TABLE IF EXISTS mv_dst; - -CREATE TABLE dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000; - -CREATE MATERIALIZED VIEW mv_dst -( - `key` Int64, - `value` String -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS non_replicated_deduplication_window=1000 -AS SELECT - 0 AS key, - value AS value -FROM dst; - -SET max_block_size=1; -SET min_insert_block_size_rows=0; -SET min_insert_block_size_bytes=0; - -SET deduplicate_blocks_in_dependent_materialized_views=1; - -select 'first attempt'; - -INSERT INTO dst SELECT - number + 1 AS key, - IF(key = 0, 'A', 'B') AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -select 'second attempt'; - -INSERT INTO dst SELECT - number + 1 AS key, - IF(key = 0, 'A', 'B') AS value -FROM numbers(2); - -SELECT - 'from dst', - *, - _part -FROM dst -ORDER by all; - -SELECT - 'from mv_dst', - *, - _part -FROM mv_dst -ORDER by all; - -DROP TABLE mv_dst; -DROP TABLE dst; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference deleted file mode 100644 index c82a6eaa213..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference +++ /dev/null @@ -1,35 +0,0 @@ -no user deduplication token -partitioned_table is deduplicated bacause deduplication works in scope of one partiotion: -1 A -1 D -2 B -2 C -mv_table is not deduplicated because the inserted blocks was different: -1 A -1 A -1 D -2 B -2 B -2 C -with user deduplication token -partitioned_table is not deduplicated because different tokens: -1 A -1 A -1 D -2 B -2 B -2 C -mv_table is not deduplicated because different tokens: -1 A -1 A -1 D -2 B -2 B -2 C -with incorrect ussage of user deduplication token -partitioned_table is deduplicated because equal tokens: -1 A -2 B -mv_table is deduplicated because equal tokens: -1 A -2 B diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql deleted file mode 100644 index 2eb931f7f73..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql +++ /dev/null @@ -1,83 +0,0 @@ -DROP TABLE IF EXISTS partitioned_table; -DROP TABLE IF EXISTS mv_table; - - -SET deduplicate_blocks_in_dependent_materialized_views = 1; - - -SELECT 'no user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is deduplicated bacause deduplication works in scope of one partiotion:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is not deduplicated because the inserted blocks was different:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; - - -SELECT 'with user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_1' VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_2' VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_3' VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is not deduplicated because different tokens:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is not deduplicated because different tokens:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; - - -SELECT 'with incorrect ussage of user deduplication token'; - -CREATE TABLE partitioned_table - (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') - partition by key % 10 - order by tuple(); - -CREATE MATERIALIZED VIEW mv_table (key Int64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') - ORDER BY tuple() - AS SELECT key, value FROM partitioned_table; - -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'B'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'C'); -INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'D'), (2, 'B'); - -SELECT 'partitioned_table is deduplicated because equal tokens:'; -SELECT * FROM partitioned_table ORDER BY ALL; -SELECT 'mv_table is deduplicated because equal tokens:'; -SELECT * FROM mv_table ORDER BY ALL; - -DROP TABLE partitioned_table; -DROP TABLE mv_table; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference deleted file mode 100644 index bf900aa84d2..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh deleted file mode 100755 index 49eb52b47fd..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference deleted file mode 100644 index c815324b455..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -table_a_b -count 1 -table_when_b_even -count 1 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 5 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 1 -0 -0 -table_a_b -count 20 -table_when_b_even -count 1 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 10 -table_when_b_even -count 5 -0 -0 -table_a_b -count 20 -table_when_b_even -count 10 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 10 -table_when_b_even -count 10 -0 -0 -table_a_b -count 20 -table_when_b_even -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh deleted file mode 100755 index 53af06d4a6f..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference deleted file mode 100644 index 6e76ec46aa8..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh deleted file mode 100755 index 7d4f5240cd1..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference deleted file mode 100644 index a25e8713c61..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference +++ /dev/null @@ -1,962 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 10 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 5 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 1 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 1 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 9 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 2 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_a_b -count 5 -table_when_b_even_and_joined -count 9 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 18 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_a_b -count 5 -table_when_b_even_and_joined -count 10 -0 -0 -table_a_b -count 10 -table_when_b_even_and_joined -count 20 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh deleted file mode 100755 index 109d1674f3a..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 20: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference deleted file mode 100644 index b6a3e0175a7..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference +++ /dev/null @@ -1,706 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh deleted file mode 100755 index fe3d610a758..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="MergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference deleted file mode 100644 index 1921103f49e..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference +++ /dev/null @@ -1,706 +0,0 @@ - -Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 16 -0 -0 -OK - -Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 8 -table_dst count 32 -0 -0 -OK - -Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 16 -0 -0 -OK - -Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 6 -0 -0 -OK - -Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False -table_src count 1 -table_dst count 2 -0 -0 -table_src count 1 -table_dst count 2 -0 -0 -OK - -Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 8 -table_dst count 12 -0 -0 -OK - -Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False -table_src count 1 -table_dst count 16 -0 -0 -table_src count 1 -table_dst count 32 -0 -0 -OK - -Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 6 -0 -0 -OK - -Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False -table_src count 8 -table_dst count 2 -0 -0 -table_src count 16 -table_dst count 2 -0 -0 -OK - -Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True -table_src count 8 -table_dst count 6 -0 -0 -table_src count 16 -table_dst count 12 -0 -0 -OK - -Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False -table_src count 8 -table_dst count 16 -0 -0 -table_src count 16 -table_dst count 32 -0 -0 -OK - -All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh deleted file mode 100755 index 9adee6d53d4..00000000000 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -ENGINE="ReplicatedMergeTree" - -RUN_ONLY="" -#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" - -i=0 -for insert_method in "InsertSelect" "InsertValues"; do - for use_insert_token in "True" "False"; do - for single_thread in "True" "False"; do - for deduplicate_src_table in "True" "False"; do - for deduplicate_dst_table in "True" "False"; do - for insert_unique_blocks in "True" "False"; do - - THIS_RUN="Test case $i:" - THIS_RUN+=" insert_method=$insert_method" - THIS_RUN+=" engine=$ENGINE" - THIS_RUN+=" use_insert_token=$use_insert_token" - THIS_RUN+=" single_thread=$single_thread" - THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" - THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" - THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" - - i=$((i+1)) - - echo - if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then - echo "skip $THIS_RUN" - continue - fi - echo "$THIS_RUN" - - $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " - $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ - --insert-method $insert_method \ - --table-engine $ENGINE \ - --use-insert-token $use_insert_token \ - --single-thread $single_thread \ - --deduplicate-src-table $deduplicate_src_table \ - --deduplicate-dst-table $deduplicate_dst_table \ - --insert-unique-blocks $insert_unique_blocks \ - --get-logs false \ - ) - " && echo OK || echo FAIL - done - done - done - done - done -done - -echo -echo "All cases executed" diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.sh b/tests/queries/0_stateless/03035_max_insert_threads_support.sh index cedb651a430..1e6bfb414d8 100755 --- a/tests/queries/0_stateless/03035_max_insert_threads_support.sh +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.sh @@ -8,7 +8,7 @@ DATA_FILE="data_$CLICKHOUSE_TEST_UNIQUE_NAME.csv" $CLICKHOUSE_CLIENT --max_insert_threads=4 --query=" EXPLAIN PIPELINE INSERT INTO FUNCTION file('$DATA_FILE') SELECT * FROM numbers_mt(1000000) ORDER BY number DESC -" | grep -o StorageFileSink | wc -l +" | grep -o MaterializingTransform | wc -l DATA_FILE_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path from file('$DATA_FILE', 'One')") rm $DATA_FILE_PATH diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh index b82ddb3813e..5d8eac082cf 100755 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh @@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT= # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" function test() diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference index ca98ec0963c..a9c785d1e48 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference @@ -2,525 +2,525 @@ Memory initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 4 UInt64 7 String 8 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 5 UInt64 8 String 9 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 5 UInt64 8 String 9 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 5 UInt64 8 String 12 None -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N \N 3 \N \N -4 4 4 \N \N \N 4 \N \N -5 5 5 \N \N \N 5 \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N \N 12 \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 MergeTree compact initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 1 UInt64 9 None 12 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 1 UInt64 9 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 1 UInt64 12 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 MergeTree wide initial insert alter add column 1 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter modify column 1 7 None 8 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert after alter modify column 1 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 alter modify column 2 8 None 11 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 insert after alter modify column 2 1 Date 1 UInt64 9 None 12 String -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 3 \N \N \N -4 4 4 4 \N \N \N -5 5 5 5 \N \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 12 \N \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N -15 15 \N \N \N \N \N -16 16 16 16 \N \N \N -17 17 str_17 str_17 \N \N \N -18 18 1970-01-19 1970-01-19 \N \N \N -19 19 \N \N \N \N \N -20 20 20 \N 20 \N \N -21 21 str_21 str_21 \N \N \N -22 22 1970-01-23 \N \N 1970-01-23 \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 alter modify column 3 1 Date 1 UInt64 9 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 insert after alter modify column 3 1 Date 1 UInt64 12 None 12 String -0 0 0 \N \N \N \N \N \N -1 1 1 \N \N \N \N \N \N -2 2 2 \N \N \N \N \N \N -3 3 3 \N \N 3 \N \N \N -4 4 4 \N \N 4 \N \N \N -5 5 5 \N \N 5 \N \N \N -6 6 6 \N \N str_6 \N \N \N -7 7 7 \N \N str_7 \N \N \N -8 8 8 \N \N str_8 \N \N \N -9 9 9 \N \N \N \N \N \N -10 10 10 \N \N \N \N \N \N -11 11 11 \N \N \N \N \N \N -12 12 12 \N \N 12 \N \N \N -13 13 13 \N \N str_13 \N \N \N -14 14 14 \N \N \N \N \N \N -15 15 15 \N \N \N \N \N \N -16 16 16 \N \N 16 \N \N \N -17 17 17 \N \N str_17 \N \N \N -18 18 18 \N \N 1970-01-19 \N \N \N -19 19 19 \N \N \N \N \N \N -20 20 20 \N \N \N 20 \N \N -21 21 21 \N \N str_21 \N \N \N -22 22 22 \N \N \N \N 1970-01-23 \N -23 \N \N \N \N \N \N \N \N -24 24 24 \N \N \N \N \N \N -25 str_25 \N str_25 \N \N \N \N \N +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference index 18a181464e9..f7c00bd8c44 100644 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference @@ -2,181 +2,181 @@ MergeTree compact initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter rename column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert nested dynamic 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] alter rename column 2 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] MergeTree wide initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 alter rename column 1 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 insert nested dynamic 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] alter rename column 2 3 Array(Dynamic) 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N [] [] [] -1 1 \N \N \N \N \N [] [] [] -2 2 \N \N \N \N \N [] [] [] -3 3 3 \N 3 \N \N [] [] [] -4 4 4 \N 4 \N \N [] [] [] -5 5 5 \N 5 \N \N [] [] [] -6 6 str_6 str_6 \N \N \N [] [] [] -7 7 str_7 str_7 \N \N \N [] [] [] -8 8 str_8 str_8 \N \N \N [] [] [] -9 9 \N \N \N \N \N [] [] [] -10 10 \N \N \N \N \N [] [] [] -11 11 \N \N \N \N \N [] [] [] -12 12 12 \N 12 \N \N [] [] [] -13 13 str_13 str_13 \N \N \N [] [] [] -14 14 \N \N \N \N \N [] [] [] -15 15 [15] \N \N \N \N [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N \N [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N \N [17] [NULL] [NULL] +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference index b1ea186a917..0dab4ea0d20 100644 --- a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference +++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference @@ -2,55 +2,55 @@ MergeTree compact initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 check table 1 MergeTree wide initial insert alter add column 3 None -0 0 \N \N \N \N -1 1 \N \N \N \N -2 2 \N \N \N \N +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 insert after alter add column 4 String 4 UInt64 7 None -0 0 \N \N \N \N \N -1 1 \N \N \N \N \N -2 2 \N \N \N \N \N -3 3 3 \N 3 \N \N -4 4 4 \N 4 \N \N -5 5 5 \N 5 \N \N -6 6 str_6 str_6 \N \N \N -7 7 str_7 str_7 \N \N \N -8 8 str_8 str_8 \N \N \N -9 9 \N \N \N \N \N -10 10 \N \N \N \N \N -11 11 \N \N \N \N \N -12 12 12 \N 12 \N \N -13 13 str_13 str_13 \N \N \N -14 14 \N \N \N \N \N +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 check table 1 diff --git a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference index 3ac6127fb21..2d33f7f6683 100644 --- a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference +++ b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference @@ -1,8 +1,8 @@ -100000000 140000000 -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 +100000000 100000000 0 0 -100000000 140000000 +100000000 100000000 0 0 0 0 1 diff --git a/tests/queries/0_stateless/03128_system_unload_primary_key.reference b/tests/queries/0_stateless/03128_system_unload_primary_key.reference index c7b40ae5b06..2646dc7247f 100644 --- a/tests/queries/0_stateless/03128_system_unload_primary_key.reference +++ b/tests/queries/0_stateless/03128_system_unload_primary_key.reference @@ -1,4 +1,4 @@ -100000000 140000000 -100000000 140000000 +100000000 100000000 +100000000 100000000 0 0 0 0 diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.reference b/tests/queries/0_stateless/03167_base64_url_functions_sh.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03167_base64_url_functions_sh.sh b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh new file mode 100755 index 00000000000..57060b8c525 --- /dev/null +++ b/tests/queries/0_stateless/03167_base64_url_functions_sh.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# shellcheck disable=SC2155 + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +urls=( + "http://www.example.com" + "https://secure.example.com" + "http://example.com" + "https://www.example.org" + "https://subdomain.example.com" + "http://sub.sub.example.com" + "http://192.168.1.1" + "https://[2001:db8:85a3:8d3:1319:8a2e:370:7348]" + "http://example.com:8080" + "https://example.com:443" + "http://example.com/path/to/page.html" + "https://example.com/path/with/trailing/slash/" + "http://example.com/search?q=query&lang=en" + "https://example.com/path?param1=value1¶m2=value2" + "http://example.com/page.html#section1" + "https://example.com/document.pdf#page=10" + "http://user:password@example.com" + "https://user@example.com" + "https://user:pass@sub.example.com:8080/path/page.html?query=123#fragment" + "http://example.com/path%20with%20spaces" + "https://example.com/search?q=encode+this" + "http://例子.测试" + "https://mañana.com" + "http://example.com/%E2%82%AC" + "data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==" + "file:///C:/path/to/file.txt" + "file:///home/user/document.pdf" + "ftp://ftp.example.com/pub/file.zip" + "ftps://secure-ftp.example.com/private/doc.pdf" + "mailto:user@example.com" + "mailto:user@example.com?subject=Hello&body=How%20are%20you" + "git://github.com/user/repo.git" + "ssh://user@host.xz:port/path/to/repo.git" + "https://example.com/path(1)/[2]/{3}" + "http://example.com/path;param?query,value" + "" + "http://" + "example.com" + "http:" + "//" + "?query=value" + "#fragment" + "http://?#" + "http://xn--bcher-kva.ch" + "https://xn--bcher-kva.xn--tckwe/xn--8ws00zhy3a/%E6%B8%AC%E8%A9%A6.php?xn--o39an51a5phao35a=xn--mgbh0fb&xn--fiq228c5hs=test" + "https://xn--3e0b707e.xn--79-8kcre8v3a/%ED%85%8C%EC%8A%A4%ED%8A%B8/%ED%8C%8C%EC%9D%BC.jsp?xn--i1b6b1a6a2e=xn--9t4b11yi5a&xn--3e0b707e=xn--80aaa1cbgbm" + "https://example.com/path?param=value&special=!@#$%^&*()" + + "http://example.com/path/with/~tilde" + "https://example.com/path/with/\`backtick\`" + + "https://example.com/path?param1=value1¶m2=value2¶m3=value3#section1#section2" + "http://example.com/page?q1=v1&q2=v2#frag1#frag2#frag3" + + "https://example.com/☃/snowman" + "http://example.com/path/⽇本語" + "https://example.com/ü/ñ/path?q=ç" + + "https://example.com/path/to/very/long/url/that/exceeds/two/hundred/and/fifty/five/characters/lorem/ipsum/dolor/sit/amet/consectetur/adipiscing/elit/sed/do/eiusmod/tempor/incididunt/ut/labore/et/dolore/magna/aliqua/ut/enim/ad/minim/veniam/quis/nostrud/exercitation/ullamco/laboris/nisi/ut/aliquip/ex/ea/commodo/consequat" + + "https://example.com//path///to//file" + "http://example.com/path?param1=value1&¶m2=value2&&¶m3=value3" + + "http://example.com/%70%61%74%68?%70%61%72%61%6d=%76%61%6c%75%65#%66%72%61%67%6d%65%6e%74" + + "HtTpS://ExAmPlE.cOm/PaTh" + "http://EXAMPLE.COM/PATH" + + "http://127.0.0.1:8080/path" + "https://[::1]/path" + "http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080/path" + + "http://example.com:65535/path" + "https://example.com:0/path" + + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAACklEQVR4nGMAAQAABQABDQottAAAAABJRU5ErkJggg==" + + "https://user:password@example.com:8080/path?query=value#fragment" + "ftp://anonymous:password@ftp.example.com/pub/" + + "http://example.com/path%20with%20spaces" + "https://example.com/search?q=query%20with%20spaces" + + "https://www.mañana.com/path" + "http://例子.测试/path" + "https://рм.рф/path" + + "https://user:pass@sub.example.com:8080/p/a/t/h?query=123&key=value#fragid1" + + "jdbc:mysql://localhost:3306/database" + "market://details?id=com.example.app" + "tel:+1-816-555-1212" + "sms:+18165551212" + + "http://[1080:0:0:0:8:800:200C:417A]/index.html" + "https://[2001:db8::1428:57ab]:8080/path" + + "http://.." + "http://../" + "http://??" + "http://??/" + "http:///a" + "http://example.com??" + "http://example.com??/" + "foo://example.com:8042/over/there?name=ferret#nose" + "//example.com/path" +) + + +base64URLEncode() { + echo -n "$1" | base64 -w0 | tr '+/' '-_' | tr -d '=' +} + +base64URLDecode() { + local len=$((${#1} % 4)) + local result="$1" + if [ $len -eq 2 ]; then result="$1"'==' + elif [ $len -eq 3 ]; then result="$1"'=' + fi + echo "$result" | tr '_-' '/+' | base64 -w0 -d +} + +test() { + local input="$1" + local encode_ch=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLEncode('$input')") + local encode_gold=$(base64URLEncode $input) + + local decode_ch=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode('$encode_gold')") + local decode_gold=$(base64URLDecode $encode_gold) + + if [ "$encode_ch" != "$encode_gold" ]; then + echo "Input: $input" + echo "Expected: $encode_gold" + echo "Got: $encode_ch" + fi + + if [ "$decode_ch" != "$input" ] || [ "$decode_ch" != "$decode_gold" ]; then + echo "Input: $input" + echo "Decode gold: $decode_gold" + echo "Got: $decode_ch" + fi +} + + +for url in "${urls[@]}"; do + test "$url" +done + +# special case for ' +decode=$(${CLICKHOUSE_CLIENT} --query="SELECT base64URLDecode(base64URLEncode('http://example.com/!$&\'()*+,;=:@/path'))") +if [ "$decode" != "http://example.com/!$&\'()*+,;=:@/path" ]; then + echo "Special case fail" + echo "Got: $decode" +fi diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference b/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference new file mode 100644 index 00000000000..306885a0974 --- /dev/null +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference @@ -0,0 +1,6 @@ +1 +0 +1 +0 +0 +0 diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql b/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql new file mode 100644 index 00000000000..75025dcadc8 --- /dev/null +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql @@ -0,0 +1,45 @@ +DROP TABLE IF EXISTS t_read_in_order_1; + +CREATE TABLE t_read_in_order_1 (id UInt64, v UInt64) +ENGINE = MergeTree ORDER BY id +SETTINGS index_granularity = 1024, index_granularity_bytes = '10M'; + +INSERT INTO t_read_in_order_1 SELECT number, number FROM numbers(1000000); + +SET max_threads = 8; +SET optimize_read_in_order = 1; +SET read_in_order_use_buffering = 1; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 WHERE v % 10 = 0 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +SET read_in_order_use_buffering = 0; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +SELECT count() FROM +( + EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 WHERE v % 10 = 0 ORDER BY id LIMIT 10 +) WHERE explain LIKE '%BufferChunks%'; + +DROP TABLE t_read_in_order_1; diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_2.reference b/tests/queries/0_stateless/03168_read_in_order_buffering_2.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql new file mode 100644 index 00000000000..1d3a75412e0 --- /dev/null +++ b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql @@ -0,0 +1,17 @@ +-- Tags: long, no-random-settings, no-tsan, no-asan, no-msan, no-s3-storage + +DROP TABLE IF EXISTS t_read_in_order_2; + +CREATE TABLE t_read_in_order_2 (id UInt64, v UInt64) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_read_in_order_2 SELECT number, number FROM numbers(10000000); +OPTIMIZE TABLE t_read_in_order_2 FINAL; + +SET optimize_read_in_order = 1; +SET max_threads = 4; +SET read_in_order_use_buffering = 1; +SET max_memory_usage = '100M'; + +SELECT * FROM t_read_in_order_2 ORDER BY id FORMAT Null; + +DROP TABLE t_read_in_order_2; diff --git a/tests/queries/0_stateless/03174_projection_deduplicate.reference b/tests/queries/0_stateless/03174_projection_deduplicate.reference new file mode 100644 index 00000000000..1796b2f1dee --- /dev/null +++ b/tests/queries/0_stateless/03174_projection_deduplicate.reference @@ -0,0 +1,3 @@ +1 one +1 one +1 one diff --git a/tests/queries/0_stateless/03174_projection_deduplicate.sql b/tests/queries/0_stateless/03174_projection_deduplicate.sql new file mode 100644 index 00000000000..46222b69dc7 --- /dev/null +++ b/tests/queries/0_stateless/03174_projection_deduplicate.sql @@ -0,0 +1,30 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/65548 +DROP TABLE IF EXISTS test_projection_deduplicate; + +CREATE TABLE test_projection_deduplicate +( + `id` Int32, + `string` String, + PROJECTION test_projection + ( + SELECT id + GROUP BY id + ) +) +ENGINE = MergeTree +PRIMARY KEY id; + +INSERT INTO test_projection_deduplicate VALUES (1, 'one'); +INSERT INTO test_projection_deduplicate VALUES (1, 'one'); + +OPTIMIZE TABLE test_projection_deduplicate DEDUPLICATE; -- { serverError NOT_IMPLEMENTED } + +SELECT * FROM test_projection_deduplicate; + +ALTER TABLE test_projection_deduplicate DROP PROJECTION test_projection; + +OPTIMIZE TABLE test_projection_deduplicate DEDUPLICATE; + +SELECT * FROM test_projection_deduplicate; + +DROP TABLE test_projection_deduplicate; diff --git a/tests/queries/0_stateless/03198_orc_read_time_zone.reference b/tests/queries/0_stateless/03198_orc_read_time_zone.reference new file mode 100644 index 00000000000..809dba44400 --- /dev/null +++ b/tests/queries/0_stateless/03198_orc_read_time_zone.reference @@ -0,0 +1 @@ +1 2024-06-30 20:00:00.000 diff --git a/tests/queries/0_stateless/03198_orc_read_time_zone.sh b/tests/queries/0_stateless/03198_orc_read_time_zone.sh new file mode 100755 index 00000000000..27530c06237 --- /dev/null +++ b/tests/queries/0_stateless/03198_orc_read_time_zone.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT -q "create table test(id UInt64, t DateTime64) Engine=MergeTree order by id" +$CLICKHOUSE_CLIENT -q "insert into test from infile '$CURDIR/data_orc/test_reader_time_zone.snappy.orc' SETTINGS input_format_orc_read_use_writer_time_zone=true FORMAT ORC" +$CLICKHOUSE_CLIENT -q "select * from test SETTINGS session_timezone='Asia/Shanghai'" +$CLICKHOUSE_CLIENT -q "drop table test" \ No newline at end of file diff --git a/tests/queries/0_stateless/03199_has_lc_fixed_string.reference b/tests/queries/0_stateless/03199_has_lc_fixed_string.reference new file mode 100644 index 00000000000..b261da18d51 --- /dev/null +++ b/tests/queries/0_stateless/03199_has_lc_fixed_string.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/tests/queries/0_stateless/03199_has_lc_fixed_string.sql b/tests/queries/0_stateless/03199_has_lc_fixed_string.sql new file mode 100644 index 00000000000..3cb551804b7 --- /dev/null +++ b/tests/queries/0_stateless/03199_has_lc_fixed_string.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS 03199_fixedstring_array; +CREATE TABLE 03199_fixedstring_array (arr Array(LowCardinality(FixedString(8)))) ENGINE = Memory; +INSERT INTO 03199_fixedstring_array VALUES (['a', 'b']), (['c', 'd']); + +SELECT has(arr, toFixedString(materialize('a'), 1)) FROM 03199_fixedstring_array; + +DROP TABLE 03199_fixedstring_array; diff --git a/tests/queries/0_stateless/03199_merge_filters_bug.reference b/tests/queries/0_stateless/03199_merge_filters_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_merge_filters_bug.sql b/tests/queries/0_stateless/03199_merge_filters_bug.sql new file mode 100644 index 00000000000..ed2ec2ea217 --- /dev/null +++ b/tests/queries/0_stateless/03199_merge_filters_bug.sql @@ -0,0 +1,70 @@ +drop table if exists t1; +drop table if exists t2; + +CREATE TABLE t1 +( + `s1` String, + `s2` String, + `s3` String +) +ENGINE = MergeTree +ORDER BY tuple(); + + +CREATE TABLE t2 +( + `fs1` FixedString(10), + `fs2` FixedString(10) +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO t1 SELECT + repeat('t', 15) s1, + 'test' s2, + 'test' s3; + +INSERT INTO t1 SELECT + substring(s1, 1, 10), + s2, + s3 +FROM generateRandom('s1 String, s2 String, s3 String') +LIMIT 10000; + +INSERT INTO t2 SELECT * +FROM generateRandom() +LIMIT 10000; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 0; + +optimize table t1 final; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')); diff --git a/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference b/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference new file mode 100644 index 00000000000..10ce589000d --- /dev/null +++ b/tests/queries/0_stateless/03199_queries_with_new_analyzer.reference @@ -0,0 +1,27 @@ +5 (4230072075578472911,4230072075578472911) 71789584853496063 +2 (4401188181514187637,4401188181514187637) 878466845199253299 +4 (4940826638032106783,4940826638032106783) 3675164899122807807 +6 (10957420562507184961,10957420562507184961) 3732623117916254211 +0 (797076400500506358,797076400500506358) 3746094338409299772 +7 (10843611042193511775,10843611042193511775) 4607251742847087615 +3 (12588286986351526898,12588286986351526898) 13889114719560662796 +8 (452995860660674674,452995860660674674) 17365664920787500812 +9 (12206106972241516904,12206106972241516904) 17567684527097330880 +1 (14558425114501132193,14558425114501132193) 18445898820068822019 +3 255 255 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 +1 +2 +3 +4 +5 +6 diff --git a/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql b/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql new file mode 100644 index 00000000000..c32d7524492 --- /dev/null +++ b/tests/queries/0_stateless/03199_queries_with_new_analyzer.sql @@ -0,0 +1,41 @@ +SET allow_experimental_analyzer=1; + +SELECT *, ngramMinHash(*) AS minhash, mortonEncode(untuple(ngramMinHash(*))) AS z +FROM (SELECT toString(number) FROM numbers(10)) +ORDER BY z LIMIT 100; + +CREATE TABLE test ( + idx UInt64, + coverage Array(UInt64), + test_name String +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO test VALUES (10, [0,1,2,3], 'xx'), (20, [3,4,5,6], 'xxx'), (90, [3,4,5,6,9], 'xxxx'); + +WITH + 4096 AS w, 4096 AS h, w * h AS pixels, + arrayJoin(coverage) AS num, + num DIV (32768 * 32768 DIV pixels) AS idx, + mortonDecode(2, idx) AS coord, + 255 AS b, + least(255, uniq(test_name)) AS r, + 255 * uniq(test_name) / (max(uniq(test_name)) OVER ()) AS g +SELECT r::UInt8, g::UInt8, b::UInt8 +FROM test +GROUP BY coord +ORDER BY coord.2 * w + coord.1 +WITH FILL FROM 0 TO 10; + + +CREATE TABLE seq ( + number UInt64 +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO seq VALUES (0), (6), (7); + +WITH (Select min(number), max(number) from seq) as range Select * from numbers(range.1, range.2); + diff --git a/tests/queries/0_stateless/03199_unbin_buffer_overflow.reference b/tests/queries/0_stateless/03199_unbin_buffer_overflow.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh b/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh new file mode 100755 index 00000000000..337debebb14 --- /dev/null +++ b/tests/queries/0_stateless/03199_unbin_buffer_overflow.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +# check for buffer overflow in unbin (due to not enough memory preallocate for output buffer) +# we iterate over all remainders of input string length modulo word_size and check that no assertions are triggered + +word_size=8 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unbin(toFixedString(materialize('$str'), $i)) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=8 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unbin(materialize('$str')) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=2 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unhex(toFixedString(materialize('$str'), $i)) WITH ROLLUP WITH TOTALS FORMAT NULL" +done + +word_size=2 +for i in $(seq 1 $((word_size+1))); do + str=$(printf "%${i}s" | tr ' ' 'x') + $CLICKHOUSE_CLIENT -q "SELECT count() FROM numbers(99) GROUP BY unhex(materialize('$str')) WITH ROLLUP WITH TOTALS FORMAT NULL" +done diff --git a/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.reference b/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.sql b/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.sql new file mode 100644 index 00000000000..2dd0a37657d --- /dev/null +++ b/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t_subcolumns_join; + +CREATE TABLE t_subcolumns_join (id UInt64) ENGINE=MergeTree ORDER BY tuple(); + +INSERT INTO t_subcolumns_join SELECT number as number FROM numbers(10000); + +SELECT + count() +FROM (SELECT number FROM numbers(10)) as tbl LEFT JOIN t_subcolumns_join ON number = id +WHERE id is null +SETTINGS allow_experimental_analyzer = 1, optimize_functions_to_subcolumns = 1, join_use_nulls = 1; + +DROP TABLE t_subcolumns_join; diff --git a/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.reference b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.reference new file mode 100644 index 00000000000..912bff45da5 --- /dev/null +++ b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.reference @@ -0,0 +1,11 @@ +str_array Array(String) +1318 +5779 +1715 +6422 +5875 +1887 +3763 +4245 +4270 +758 diff --git a/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.sh b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.sh new file mode 100755 index 00000000000..dcecd7b3bea --- /dev/null +++ b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_DIR=$CUR_DIR/data_avro + +# See https://github.com/ClickHouse/ClickHouse/issues/60438 +$CLICKHOUSE_LOCAL -q "DESC file('$DATA_DIR/negative_block_size_arrays.avro')" +$CLICKHOUSE_LOCAL -q "SELECT arraySum(arrayMap(x -> length(x), str_array)) AS res FROM file('$DATA_DIR/negative_block_size_arrays.avro')" diff --git a/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference new file mode 100644 index 00000000000..62f5eb45106 --- /dev/null +++ b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference @@ -0,0 +1,24 @@ +QUERY id: 0 + PROJECTION COLUMNS + (sumIf(toInt64(1), 1)) Tuple(Int64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: tuple, function_type: ordinary, result_type: Tuple(Int64) + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: sumIf, function_type: aggregate, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: Int64_1, constant_value_type: Int64 + EXPRESSION + FUNCTION id: 7, function_name: toInt64, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 12, nodes: 1 + CONSTANT id: 13, constant_value: UInt64_100, constant_value_type: UInt8 + SETTINGS optimize_rewrite_sum_if_to_count_if=1 diff --git a/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql new file mode 100644 index 00000000000..24369fd6497 --- /dev/null +++ b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql @@ -0,0 +1,2 @@ +SET allow_experimental_analyzer = 1; +EXPLAIN QUERY TREE SELECT tuple(sumIf(toInt64(1), 1)) FROM numbers(100) settings optimize_rewrite_sum_if_to_count_if=1; diff --git a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference new file mode 100644 index 00000000000..8565fe3d0fa --- /dev/null +++ b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.reference @@ -0,0 +1,402 @@ +Memory +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 +MergeTree compact +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 +MergeTree wide +test +[] 1 0 0 [] +1 0 1 0 [] +\N 1 1 0 [] +['str_3','str_3','str_3'] 1 0 3 [1,1,1] +4 0 1 0 [] +\N 1 1 0 [] +[6,6,6,6,6,6] 1 0 6 [0,0,0,0,0,0] +7 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 9 [1,1,1,1,1,1,1,1,1] +10 0 1 0 [] +\N 1 1 0 [] +['str_12','str_12'] 1 0 2 [1,1] +13 0 1 0 [] +\N 1 1 0 [] +[15,15,15,15,15] 1 0 5 [0,0,0,0,0] +16 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 8 [1,1,1,1,1,1,1,1] +19 0 1 0 [] +\N 1 1 0 [] +['str_21'] 1 0 1 [1] +22 0 1 0 [] +\N 1 1 0 [] +[24,24,24,24] 1 0 4 [0,0,0,0] +25 0 1 0 [] +\N 1 1 0 [] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL] 1 0 7 [1,1,1,1,1,1,1] +28 0 1 0 [] +\N 1 1 0 [] +[] 1 0 0 [] +31 0 1 0 [] +\N 1 1 0 [] +[33,33,33] 1 0 3 [0,0,0] +34 0 1 0 [] +\N 1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [1,1,1] +0 1 0 [] +1 1 0 [] +1 0 6 [0,0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 9 [1,1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 2 [1,1] +0 1 0 [] +1 1 0 [] +1 0 5 [0,0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 8 [1,1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 1 [1] +0 1 0 [] +1 1 0 [] +1 0 4 [0,0,0,0] +0 1 0 [] +1 1 0 [] +1 0 7 [1,1,1,1,1,1,1] +0 1 0 [] +1 1 0 [] +1 0 0 [] +0 1 0 [] +1 1 0 [] +1 0 3 [0,0,0] +0 1 0 [] +1 1 0 [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [1,1,1] [0,0,0] +1 0 [] [] +1 0 [] [] +0 6 [0,0,0,0,0,0] [1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 9 [1,1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 2 [1,1] [0,0] +1 0 [] [] +1 0 [] [] +0 5 [0,0,0,0,0] [1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 8 [1,1,1,1,1,1,1,1] [1,1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 1 [1] [0] +1 0 [] [] +1 0 [] [] +0 4 [0,0,0,0] [1,1,1,1] +1 0 [] [] +1 0 [] [] +0 7 [1,1,1,1,1,1,1] [1,1,1,1,1,1,1] +1 0 [] [] +1 0 [] [] +0 0 [] [] +1 0 [] [] +1 0 [] [] +0 3 [0,0,0] [1,1,1] +1 0 [] [] +1 0 [] [] +0 +2 +3 +5 +6 +8 +9 +11 +12 +14 +15 +17 +18 +20 +21 +23 +24 +26 +27 +29 +30 +32 +33 +35 diff --git a/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh new file mode 100755 index 00000000000..8231691e184 --- /dev/null +++ b/tests/queries/0_stateless/03201_variant_null_map_subcolumn.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1" + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(36)" + $CH_CLIENT -q "select v, v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id" + $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id" + $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id" + $CH_CLIENT -q "select id from test where v.UInt64 is null order by id" + + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(1000000) settings min_insert_block_size_rows=100000" + $CH_CLIENT -q "select v, v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null" + $CH_CLIENT -q "select v.UInt64.null, v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null from test order by id format Null" + $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`.null, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64.null, v.\`Array(Variant(String, UInt64))\`.String.null from test order by id format Null" + $CH_CLIENT -q "select id from test where v.UInt64 is null order by id format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference new file mode 100644 index 00000000000..8740726c7ef --- /dev/null +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference @@ -0,0 +1,57 @@ +Memory +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 +MergeTree compact +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 +MergeTree wide +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh new file mode 100755 index 00000000000..aa06e48376c --- /dev/null +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, number from numbers(10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(10, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(20, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, NULL from numbers(30, 10) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(40, 40) settings min_insert_block_size_rows=50000" + $CH_CLIENT -q "insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(10, 10) settings min_insert_block_size_rows=50000" + + $CH_CLIENT -q "select distinct dynamicType(d) as type from test order by type" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'UInt64'" + $CH_CLIENT -q "select count() from test where d.UInt64 is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'String'" + $CH_CLIENT -q "select count() from test where d.String is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Date'" + $CH_CLIENT -q "select count() from test where d.Date is not NULL" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Variant(String, UInt64))\`)" + $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'" + $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Array(Dynamic))\`)" + $CH_CLIENT -q "select count() from test where d is NULL" + $CH_CLIENT -q "select count() from test where not empty(d.\`Tuple(a Array(Dynamic))\`.a.String)" + + $CH_CLIENT -q "select d, d.UInt64.null, d.String.null, d.\`Array(Variant(String, UInt64))\`.null from test format Null" + $CH_CLIENT -q "select d.UInt64.null, d.String.null, d.\`Array(Variant(String, UInt64))\`.null from test format Null" + $CH_CLIENT -q "select d.Int8.null, d.Date.null, d.\`Array(String)\`.null from test format Null" + $CH_CLIENT -q "select d, d.UInt64.null, d.Date.null, d.\`Array(Variant(String, UInt64))\`.null, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.UInt64.null, d.Date.null, d.\`Array(Variant(String, UInt64))\`.null, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null, d.\`Array(Variant(String, UInt64))\`.String.null from test format Null" + $CH_CLIENT -q "select d, d.\`Tuple(a UInt64, b String)\`.a, d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64.null, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64.null, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" + $CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64.null, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/data_avro/negative_block_size_arrays.avro b/tests/queries/0_stateless/data_avro/negative_block_size_arrays.avro new file mode 100644 index 00000000000..ec785a885dc Binary files /dev/null and b/tests/queries/0_stateless/data_avro/negative_block_size_arrays.avro differ diff --git a/tests/queries/0_stateless/data_orc/test_reader_time_zone.snappy.orc b/tests/queries/0_stateless/data_orc/test_reader_time_zone.snappy.orc new file mode 100644 index 00000000000..ab1b785dbbf Binary files /dev/null and b/tests/queries/0_stateless/data_orc/test_reader_time_zone.snappy.orc differ diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index fa2bfef935a..78c4b6bde95 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2758 +personal_ws-1.1 en 2942 AArch ACLs ALTERs @@ -449,7 +449,7 @@ Kahan Kaser KeeperAliveConnections KeeperMap -KeeperOutstandingRequets +KeeperOutstandingRequests Kerberos Khanna KittenHouse @@ -1098,6 +1098,8 @@ aggregatefunction aggregatingmergetree aggregatio aggretate +aggthrow +aggThrow aiochclient allocator alphaTokens @@ -1656,9 +1658,9 @@ fsync func fuzzBits fuzzJSON +fuzzQuery fuzzer fuzzers -fuzzQuery gRPC gccMurmurHash gcem @@ -1980,6 +1982,8 @@ mapExtractKeyLike mapFilter mapFromArrays mapKeys +mapPartialReverseSort +mapPartialSort mapPopulateSeries mapReverseSort mapSort @@ -1996,6 +2000,7 @@ maxMap maxintersections maxintersectionsposition maxmap +minMappedArrays maxmind mdadm meanZTest @@ -2013,6 +2018,7 @@ metrica metroHash mfedotov minMap +minMappedArrays minSampleSizeContinuous minSampleSizeConversion mindsdb @@ -2120,8 +2126,10 @@ noaa nonNegativeDerivative noop normalizeQuery +normalizeQueryKeepNames normalizeUTF normalizedQueryHash +normalizedQueryHashKeepNames notEmpty notEquals notILike @@ -2795,6 +2803,7 @@ tupleModulo tupleModuloByNumber tupleMultiply tupleMultiplyByNumber +tupleNames tupleNegate tuplePlus tupleToNameValuePairs diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8112ed9083b..271065a78fb 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 @@ -6,6 +7,7 @@ v24.5.1.1763-stable 2024-06-01 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.5.46-lts 2024-07-03 v24.3.4.147-lts 2024-06-13 v24.3.3.102-lts 2024-05-01 v24.3.2.23-lts 2024-04-03