diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 83e22c0b7c6..4d57ae450c4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -21,6 +21,9 @@ jobs: EOF - name: Check out repository code uses: actions/checkout@v2 + with: + # Always use the most recent script version + ref: master - name: Download packages and push to Artifactory run: | rm -rf "$TEMP_PATH" && mkdir -p "$TEMP_PATH" diff --git a/.gitmodules b/.gitmodules index c46b1c736fc..979f347e6ad 100644 --- a/.gitmodules +++ b/.gitmodules @@ -265,6 +265,9 @@ [submodule "contrib/hashidsxx"] path = contrib/hashidsxx url = https://github.com/schoentoon/hashidsxx.git +[submodule "contrib/nats-io"] + path = contrib/nats-io + url = https://github.com/ClickHouse/nats.c.git [submodule "contrib/vectorscan"] path = contrib/vectorscan url = https://github.com/VectorCamp/vectorscan.git diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index e8c9c58aca3..da7b3c81f32 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -40,10 +40,16 @@ if [[ $(./clickhouse client --query "EXISTS hits") == '1' && $(./clickhouse clie echo "Dataset already downloaded" else echo "Will download the dataset" + if [ "`uname`" = "Darwin" ] + then + ./clickhouse client --receive_timeout 1000 --max_insert_threads $(sysctl -n hw.ncpu) --progress --query " + CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) + AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" + else ./clickhouse client --receive_timeout 1000 --max_insert_threads $(nproc || 4) --progress --query " CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" - + fi ./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM hits" fi @@ -63,8 +69,8 @@ QUERY_NUM=1 cat "$QUERIES_FILE" | sed "s/{table}/hits/g" | while read query; do sync - if [ "${OS}" = "Darwin" ] - then + if [ "`uname`" = "Darwin" ] + then sudo purge > /dev/null else echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null @@ -90,8 +96,8 @@ echo touch {cpu_model,cpu,df,memory,memory_total,blk,mdstat,instance}.txt -if [ "${OS}" = "Darwin" ] -then +if [ "`uname`" = "Darwin" ] +then echo '----Version, build id-----------' ./clickhouse local --query "SELECT format('Version: {}', version())" ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw @@ -211,4 +217,31 @@ TO benchmark; GRANT INSERT ON benchmark_runs TO benchmark; GRANT INSERT ON benchmark_results TO benchmark; +Example query: + +SELECT + cpu_model, + threads, + instance, + k +FROM +( + SELECT + run_id, + exp(avg(log(adjusted_time / best_time))) AS k + FROM + ( + WITH greatest(time, 0.01) AS adjusted_time + SELECT + run_id, + adjusted_time, + min(adjusted_time) OVER (PARTITION BY query_num, try_num) AS best_time + FROM benchmark_results + WHERE try_num > 1 + ) + GROUP BY run_id + ORDER BY k ASC +) AS t +INNER JOIN benchmark_runs USING (run_id) + //// diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1dd28fa90ff..d4a3f164214 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -134,6 +134,7 @@ add_contrib (krb5-cmake krb5) add_contrib (cyrus-sasl-cmake cyrus-sasl) # for krb5 add_contrib (libgsasl-cmake libgsasl) # requires krb5 add_contrib (librdkafka-cmake librdkafka) # requires: libgsasl +add_contrib (nats-io-cmake nats-io) add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5 add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift/avro/arrow/libhdfs3 add_contrib (cppkafka-cmake cppkafka) diff --git a/contrib/libprotobuf-mutator b/contrib/libprotobuf-mutator index ffd86a32874..a304ec48dcf 160000 --- a/contrib/libprotobuf-mutator +++ b/contrib/libprotobuf-mutator @@ -1 +1 @@ -Subproject commit ffd86a32874e5c08a143019aad1aaf0907294c9f +Subproject commit a304ec48dcf15d942607032151f7e9ee504b5dcf diff --git a/contrib/libprotobuf-mutator-cmake/CMakeLists.txt b/contrib/libprotobuf-mutator-cmake/CMakeLists.txt index a623f95c418..9bbd6c17caa 100644 --- a/contrib/libprotobuf-mutator-cmake/CMakeLists.txt +++ b/contrib/libprotobuf-mutator-cmake/CMakeLists.txt @@ -14,8 +14,11 @@ add_library(_protobuf-mutator ${LIBRARY_DIR}/src/text_format.cc ${LIBRARY_DIR}/src/utf8_fix.cc) -target_include_directories(_protobuf-mutator BEFORE INTERFACE "${LIBRARY_DIR}") -target_include_directories(_protobuf-mutator BEFORE INTERFACE "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src") +# codegen_select_fuzzer includes ... +target_include_directories(_protobuf-mutator BEFORE PUBLIC "${LIBRARY_DIR}/src") +# ... which includes +target_include_directories(_protobuf-mutator BEFORE PUBLIC "${LIBRARY_DIR}") +target_include_directories(_protobuf-mutator BEFORE PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src") target_link_libraries(_protobuf-mutator ch_contrib::protobuf) diff --git a/contrib/nats-io b/contrib/nats-io new file mode 160000 index 00000000000..6b2227f3675 --- /dev/null +++ b/contrib/nats-io @@ -0,0 +1 @@ +Subproject commit 6b2227f36757da090321e2d317569d2bd42c4cc1 diff --git a/contrib/nats-io-cmake/CMakeLists.txt b/contrib/nats-io-cmake/CMakeLists.txt new file mode 100644 index 00000000000..5588d5750c4 --- /dev/null +++ b/contrib/nats-io-cmake/CMakeLists.txt @@ -0,0 +1,59 @@ +option (ENABLE_NATS "Enable NATS" ${ENABLE_LIBRARIES}) + +if (OS_FREEBSD) + set(ENABLE_NATS OFF) + message (STATUS "Using internal nats-io library on FreeBSD is not supported") +endif() + +if (NOT ENABLE_NATS) + message(STATUS "Not using nats-io") + return() +endif() + +set(NATS_IO_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/nats-io/src") + +if(UNIX) + set(NATS_PLATFORM_INCLUDE "unix") +elseif(WIN32) + set(NATS_PLATFORM_INCLUDE "apple") +endif() + +file(GLOB PS_SOURCES "${NATS_IO_SOURCE_DIR}/${NATS_PLATFORM_INCLUDE}/*.c") +set(SRCS + "${NATS_IO_SOURCE_DIR}/asynccb.c" + "${NATS_IO_SOURCE_DIR}/buf.c" + "${NATS_IO_SOURCE_DIR}/comsock.c" + "${NATS_IO_SOURCE_DIR}/conn.c" + "${NATS_IO_SOURCE_DIR}/crypto.c" + "${NATS_IO_SOURCE_DIR}/hash.c" + "${NATS_IO_SOURCE_DIR}/js.c" + "${NATS_IO_SOURCE_DIR}/jsm.c" + "${NATS_IO_SOURCE_DIR}/kv.c" + "${NATS_IO_SOURCE_DIR}/msg.c" + "${NATS_IO_SOURCE_DIR}/nats.c" + "${NATS_IO_SOURCE_DIR}/natstime.c" + "${NATS_IO_SOURCE_DIR}/nkeys.c" + "${NATS_IO_SOURCE_DIR}/nuid.c" + "${NATS_IO_SOURCE_DIR}/opts.c" + "${NATS_IO_SOURCE_DIR}/parser.c" + "${NATS_IO_SOURCE_DIR}/pub.c" + "${NATS_IO_SOURCE_DIR}/srvpool.c" + "${NATS_IO_SOURCE_DIR}/stats.c" + "${NATS_IO_SOURCE_DIR}/status.c" + "${NATS_IO_SOURCE_DIR}/sub.c" + "${NATS_IO_SOURCE_DIR}/timer.c" + "${NATS_IO_SOURCE_DIR}/url.c" + "${NATS_IO_SOURCE_DIR}/util.c" +) + +add_library(_nats_io ${SRCS} ${PS_SOURCES}) +add_library(ch_contrib::nats_io ALIAS _nats_io) + +target_include_directories(_nats_io SYSTEM PUBLIC ${NATS_IO_SOURCE_DIR}) +target_include_directories(_nats_io SYSTEM PUBLIC ${NATS_IO_SOURCE_DIR}/adapters) +target_include_directories(_nats_io SYSTEM PUBLIC ${NATS_IO_SOURCE_DIR}/include) +target_include_directories(_nats_io SYSTEM PUBLIC ${NATS_IO_SOURCE_DIR}/${NATS_PLATFORM_INCLUDE}) + +target_link_libraries(_nats_io + PRIVATE OpenSSL::Crypto OpenSSL::SSL ch_contrib::uv +) diff --git a/contrib/poco b/contrib/poco index 0e32cb42db7..9fec8e11dbb 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 0e32cb42db76ddaa76848470219056908053b676 +Subproject commit 9fec8e11dbb6a352e1cfba8cc9e23ebd7fb77310 diff --git a/contrib/simdjson b/contrib/simdjson index de196dd7a3a..1075e8609c4 160000 --- a/contrib/simdjson +++ b/contrib/simdjson @@ -1 +1 @@ -Subproject commit de196dd7a3a16e4056b0551ffa3b85c2f52581e1 +Subproject commit 1075e8609c4afa253162d441437af929c29e31bb diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 995cecfebc7..1dff4b1a2d4 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -97,13 +97,24 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH -ARG NFPM_VERSION=2.15.1 +ARG NFPM_VERSION=2.16.0 RUN arch=${TARGETARCH:-amd64} \ && curl -Lo /tmp/nfpm.deb "https://github.com/goreleaser/nfpm/releases/download/v${NFPM_VERSION}/nfpm_${arch}.deb" \ && dpkg -i /tmp/nfpm.deb \ && rm /tmp/nfpm.deb +ARG GO_VERSION=1.18.3 +# We need go for clickhouse-diagnostics +RUN arch=${TARGETARCH:-amd64} \ + && curl -Lo /tmp/go.tgz "https://go.dev/dl/go${GO_VERSION}.linux-${arch}.tar.gz" \ + && tar -xzf /tmp/go.tgz -C /usr/local/ \ + && rm /tmp/go.tgz + +ENV PATH="$PATH:/usr/local/go/bin" +ENV GOPATH=/workdir/go +ENV GOCACHE=/workdir/ + RUN mkdir /workdir && chmod 777 /workdir WORKDIR /workdir diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index b8d11e9c293..270c93c105c 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -29,8 +29,21 @@ env if [ -n "$MAKE_DEB" ]; then rm -rf /build/packages/root + if [ -z "$SANITIZER" ]; then + # We need to check if clickhouse-diagnostics is fine and build it + ( + cd /build/programs/diagnostics + make test-no-docker + GOARCH="${DEB_ARCH}" CGO_ENABLED=0 make VERSION="$VERSION_STRING" build + mv clickhouse-diagnostics .. + ) + else + echo -e "#!/bin/sh\necho 'Not implemented for this type of package'" > /build/programs/clickhouse-diagnostics + chmod +x /build/programs/clickhouse-diagnostics + fi fi + cache_status # clear cache stats ccache --zero-stats ||: @@ -81,6 +94,8 @@ if [ -n "$MAKE_DEB" ]; then # No quotes because I want it to expand to nothing if empty. # shellcheck disable=SC2086 DESTDIR=/build/packages/root ninja $NINJA_FLAGS install + cp /build/programs/clickhouse-diagnostics /build/packages/root/usr/bin + cp /build/programs/clickhouse-diagnostics /output bash -x /build/packages/build fi diff --git a/docker/server/README.md b/docker/server/README.md index c074a1bac00..2ff08620658 100644 --- a/docker/server/README.md +++ b/docker/server/README.md @@ -2,131 +2,138 @@ ## What is ClickHouse? -ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time. +ClickHouse is an open-source column-oriented database management system that allows the generation of analytical data reports in real-time. -ClickHouse manages extremely large volumes of data in a stable and sustainable manner. It currently powers [Yandex.Metrica](https://metrica.yandex.com/), world’s [second largest](http://w3techs.com/technologies/overview/traffic_analysis/all) web analytics platform, with over 13 trillion database records and over 20 billion events a day, generating customized reports on-the-fly, directly from non-aggregated data. This system was successfully implemented at [CERN’s LHCb experiment](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) to store and process metadata on 10bn events with over 1000 attributes per event registered in 2011. +ClickHouse manages extremely large volumes of data. It currently powers [Yandex.Metrica](https://metrica.yandex.com/), the world’s [second-largest](http://w3techs.com/technologies/overview/traffic_analysis/all) web analytics platform, with over 13 trillion database records and over 20 billion events a day, generating customized reports on-the-fly, directly from non-aggregated data. This system was successfully implemented at [CERN’s LHCb experiment](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) to store and process metadata on 10bn events with over 1000 attributes per event registered in 2011. For more information and documentation see https://clickhouse.com/. ## How to use this image ### start server instance + ```bash -$ docker run -d --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server +docker run -d --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server ``` -By default ClickHouse will be accessible only via docker network. See the [networking section below](#networking). +By default, ClickHouse will be accessible only via the Docker network. See the [networking section below](#networking). -By default, starting above server instance will be run as default user without password. +By default, starting above server instance will be run as the `default` user without a password. ### connect to it from a native client + ```bash -$ docker run -it --rm --link some-clickhouse-server:clickhouse-server --entrypoint clickhouse-client clickhouse/clickhouse-server --host clickhouse-server +docker run -it --rm --link some-clickhouse-server:clickhouse-server --entrypoint clickhouse-client clickhouse/clickhouse-server --host clickhouse-server # OR -$ docker exec -it some-clickhouse-server clickhouse-client +docker exec -it some-clickhouse-server clickhouse-client ``` -More information about [ClickHouse client](https://clickhouse.com/docs/en/interfaces/cli/). +More information about the [ClickHouse client](https://clickhouse.com/docs/en/interfaces/cli/). ### connect to it using curl ```bash echo "SELECT 'Hello, ClickHouse!'" | docker run -i --rm --link some-clickhouse-server:clickhouse-server curlimages/curl 'http://clickhouse-server:8123/?query=' -s --data-binary @- ``` + More information about [ClickHouse HTTP Interface](https://clickhouse.com/docs/en/interfaces/http/). -### stopping / removing the containter +### stopping / removing the container ```bash -$ docker stop some-clickhouse-server -$ docker rm some-clickhouse-server +docker stop some-clickhouse-server +docker rm some-clickhouse-server ``` ### networking -You can expose you ClickHouse running in docker by [mapping particular port](https://docs.docker.com/config/containers/container-networking/) from inside container to a host ports: +You can expose your ClickHouse running in docker by [mapping a particular port](https://docs.docker.com/config/containers/container-networking/) from inside the container using host ports: ```bash -$ docker run -d -p 18123:8123 -p19000:9000 --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server -$ echo 'SELECT version()' | curl 'http://localhost:18123/' --data-binary @- +docker run -d -p 18123:8123 -p19000:9000 --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server +echo 'SELECT version()' | curl 'http://localhost:18123/' --data-binary @- 20.12.3.3 ``` -or by allowing container to use [host ports directly](https://docs.docker.com/network/host/) using `--network=host` (also allows archiving better network performance): +or by allowing the container to use [host ports directly](https://docs.docker.com/network/host/) using `--network=host` (also allows archiving better network performance): ```bash -$ docker run -d --network=host --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server -$ echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @- +docker run -d --network=host --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server +echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @- 20.12.3.3 ``` ### Volumes -Typically you may want to mount the following folders inside your container to archieve persistency: +Typically you may want to mount the following folders inside your container to achieve persistency: * `/var/lib/clickhouse/` - main folder where ClickHouse stores the data -* `/val/log/clickhouse-server/` - logs +* `/var/log/clickhouse-server/` - logs ```bash -$ docker run -d \ - -v $(realpath ./ch_data):/var/lib/clickhouse/ \ - -v $(realpath ./ch_logs):/var/log/clickhouse-server/ \ - --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server +docker run -d \ + -v $(realpath ./ch_data):/var/lib/clickhouse/ \ + -v $(realpath ./ch_logs):/var/log/clickhouse-server/ \ + --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server ``` You may also want to mount: * `/etc/clickhouse-server/config.d/*.xml` - files with server configuration adjustmenets -* `/etc/clickhouse-server/usert.d/*.xml` - files with use settings adjustmenets +* `/etc/clickhouse-server/users.d/*.xml` - files with user settings adjustmenets * `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below). ### Linux capabilities -ClickHouse has some advanced functionality which requite enabling several [linux capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html). +ClickHouse has some advanced functionality, which requires enabling several [Linux capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html). -It is optional and can be enabled using the following [docker command line agruments](https://docs.docker.com/engine/reference/run/#runtime-privilege-and-linux-capabilities): +These are optional and can be enabled using the following [docker command-line arguments](https://docs.docker.com/engine/reference/run/#runtime-privilege-and-linux-capabilities): ```bash -$ docker run -d \ - --cap-add=SYS_NICE --cap-add=NET_ADMIN --cap-add=IPC_LOCK \ - --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server +docker run -d \ + --cap-add=SYS_NICE --cap-add=NET_ADMIN --cap-add=IPC_LOCK \ + --name some-clickhouse-server --ulimit nofile=262144:262144 clickhouse/clickhouse-server ``` ## Configuration -Container exposes 8123 port for [HTTP interface](https://clickhouse.com/docs/en/interfaces/http_interface/) and 9000 port for [native client](https://clickhouse.com/docs/en/interfaces/tcp/). +The container exposes port 8123 for the [HTTP interface](https://clickhouse.com/docs/en/interfaces/http_interface/) and port 9000 for the [native client](https://clickhouse.com/docs/en/interfaces/tcp/). -ClickHouse configuration represented with a file "config.xml" ([documentation](https://clickhouse.com/docs/en/operations/configuration_files/)) +ClickHouse configuration is represented with a file "config.xml" ([documentation](https://clickhouse.com/docs/en/operations/configuration_files/)) ### Start server instance with custom configuration + ```bash -$ docker run -d --name some-clickhouse-server --ulimit nofile=262144:262144 -v /path/to/your/config.xml:/etc/clickhouse-server/config.xml clickhouse/clickhouse-server +docker run -d --name some-clickhouse-server --ulimit nofile=262144:262144 -v /path/to/your/config.xml:/etc/clickhouse-server/config.xml clickhouse/clickhouse-server ``` -### Start server as custom user -``` +### Start server as a custom user + +```bash # $(pwd)/data/clickhouse should exist and be owned by current user -$ docker run --rm --user ${UID}:${GID} --name some-clickhouse-server --ulimit nofile=262144:262144 -v "$(pwd)/logs/clickhouse:/var/log/clickhouse-server" -v "$(pwd)/data/clickhouse:/var/lib/clickhouse" clickhouse/clickhouse-server +docker run --rm --user ${UID}:${GID} --name some-clickhouse-server --ulimit nofile=262144:262144 -v "$(pwd)/logs/clickhouse:/var/log/clickhouse-server" -v "$(pwd)/data/clickhouse:/var/lib/clickhouse" clickhouse/clickhouse-server ``` -When you use the image with mounting local directories inside you probably would like to not mess your directory tree with files owner and permissions. Then you could use `--user` argument. In this case, you should mount every necessary directory (`/var/lib/clickhouse` and `/var/log/clickhouse-server`) inside the container. Otherwise, image will complain and not start. + +When you use the image with local directories mounted, you probably want to specify the user to maintain the proper file ownership. Use the `--user` argument and mount `/var/lib/clickhouse` and `/var/log/clickhouse-server` inside the container. Otherwise, the image will complain and not start. ### Start server from root (useful in case of userns enabled) -``` -$ docker run --rm -e CLICKHOUSE_UID=0 -e CLICKHOUSE_GID=0 --name clickhouse-server-userns -v "$(pwd)/logs/clickhouse:/var/log/clickhouse-server" -v "$(pwd)/data/clickhouse:/var/lib/clickhouse" clickhouse/clickhouse-server + +```bash +docker run --rm -e CLICKHOUSE_UID=0 -e CLICKHOUSE_GID=0 --name clickhouse-server-userns -v "$(pwd)/logs/clickhouse:/var/log/clickhouse-server" -v "$(pwd)/data/clickhouse:/var/lib/clickhouse" clickhouse/clickhouse-server ``` ### How to create default database and user on starting -Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER`, `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT` and `CLICKHOUSE_PASSWORD`: +Sometimes you may want to create a user (user named `default` is used by default) and database on image start. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER`, `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT` and `CLICKHOUSE_PASSWORD`: -``` -$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp clickhouse/clickhouse-server +```bash +docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp clickhouse/clickhouse-server ``` ## How to extend this image -If you would like to do additional initialization in an image derived from this one, add one or more `*.sql`, `*.sql.gz`, or `*.sh` scripts under `/docker-entrypoint-initdb.d`. After the entrypoint calls `initdb` it will run any `*.sql` files, run any executable `*.sh` scripts, and source any non-executable `*.sh` scripts found in that directory to do further initialization before starting the service. -Also you can provide environment variables `CLICKHOUSE_USER` & `CLICKHOUSE_PASSWORD` that will be used for clickhouse-client during initialization. +To perform additional initialization in an image derived from this one, add one or more `*.sql`, `*.sql.gz`, or `*.sh` scripts under `/docker-entrypoint-initdb.d`. After the entrypoint calls `initdb`, it will run any `*.sql` files, run any executable `*.sh` scripts, and source any non-executable `*.sh` scripts found in that directory to do further initialization before starting the service. +Also, you can provide environment variables `CLICKHOUSE_USER` & `CLICKHOUSE_PASSWORD` that will be used for clickhouse-client during initialization. For example, to add an additional user and database, add the following to `/docker-entrypoint-initdb.d/init-db.sh`: @@ -135,11 +142,12 @@ For example, to add an additional user and database, add the following to `/dock set -e clickhouse client -n <<-EOSQL - CREATE DATABASE docker; - CREATE TABLE docker.docker (x Int32) ENGINE = Log; + CREATE DATABASE docker; + CREATE TABLE docker.docker (x Int32) ENGINE = Log; EOSQL ``` ## License View [license information](https://github.com/ClickHouse/ClickHouse/blob/master/LICENSE) for the software contained in this image. + diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index d6bd458a01b..a124d95b360 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -63,6 +63,7 @@ RUN python3 -m pip install \ PyMySQL \ aerospike==4.0.0 \ avro==1.10.2 \ + asyncio \ cassandra-driver \ confluent-kafka==1.5.0 \ dict2xml \ @@ -75,6 +76,7 @@ RUN python3 -m pip install \ kazoo \ lz4 \ minio \ + nats-py \ protobuf \ psycopg2-binary==2.8.6 \ pymongo==3.11.0 \ diff --git a/docker/test/integration/runner/compose/docker_compose_nats.yml b/docker/test/integration/runner/compose/docker_compose_nats.yml new file mode 100644 index 00000000000..19ae4c162b1 --- /dev/null +++ b/docker/test/integration/runner/compose/docker_compose_nats.yml @@ -0,0 +1,7 @@ +version: '2.3' +services: + nats1: + image: nats + ports: + - "${NATS_EXTERNAL_PORT}:${NATS_INTERNAL_PORT}" + command: "-p 4444 --user click --pass house" \ No newline at end of file diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 0b517fb4af8..b3ee4e62f20 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -110,7 +110,8 @@ function stop() # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. kill -TERM "$(pidof gdb)" ||: sleep 5 - gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" ||: + echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log + gdb -batch -ex 'thread apply all backtrace' -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log clickhouse stop --force } @@ -119,9 +120,10 @@ function start() counter=0 until clickhouse-client --query "SELECT 1" do - if [ "$counter" -gt ${1:-240} ] + if [ "$counter" -gt ${1:-120} ] then echo "Cannot start clickhouse-server" + echo -e "Cannot start clickhouse-server\tFAIL" >> /test_output/test_results.tsv cat /var/log/clickhouse-server/stdout.log tail -n1000 /var/log/clickhouse-server/stderr.log tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | grep -F -v -e ' RaftInstance:' -e ' RaftInstance' | tail -n1000 @@ -295,6 +297,10 @@ then # Start server from previous release configure + + # Avoid "Setting allow_deprecated_database_ordinary is neither a builtin setting..." + rm -f /etc/clickhouse-server/users.d/database_ordinary.xml ||: + start clickhouse-client --query="SELECT 'Server version: ', version()" diff --git a/docs/_includes/cmake_in_clickhouse_footer.md b/docs/_includes/cmake_in_clickhouse_footer.md deleted file mode 100644 index bf8411ba815..00000000000 --- a/docs/_includes/cmake_in_clickhouse_footer.md +++ /dev/null @@ -1,121 +0,0 @@ - -## Developer's guide for adding new CMake options - -### Don't be obvious. Be informative. - -Bad: -```cmake -option (ENABLE_TESTS "Enables testing" OFF) -``` - -This description is quite useless as is neither gives the viewer any additional information nor explains the option purpose. - -Better: - -```cmake -option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF) -``` - -If the option's purpose can't be guessed by its name, or the purpose guess may be misleading, or option has some -pre-conditions, leave a comment above the `option()` line and explain what it does. -The best way would be linking the docs page (if it exists). -The comment is parsed into a separate column (see below). - -Even better: - -```cmake -# implies ${TESTS_ARE_ENABLED} -# see tests/CMakeLists.txt for implementation detail. -option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF) -``` - -### If the option's state could produce unwanted (or unusual) result, explicitly warn the user. - -Suppose you have an option that may strip debug symbols from the ClickHouse's part. -This can speed up the linking process, but produces a binary that cannot be debugged. -In that case, prefer explicitly raising a warning telling the developer that he may be doing something wrong. -Also, such options should be disabled if applies. - -Bad: -```cmake -option(STRIP_DEBUG_SYMBOLS_FUNCTIONS - "Do not generate debugger info for ClickHouse functions. - ${STRIP_DSF_DEFAULT}) - -if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) - target_compile_options(clickhouse_functions PRIVATE "-g0") -endif() - -``` -Better: - -```cmake -# Provides faster linking and lower binary size. -# Tradeoff is the inability to debug some source files with e.g. gdb -# (empty stack frames and no local variables)." -option(STRIP_DEBUG_SYMBOLS_FUNCTIONS - "Do not generate debugger info for ClickHouse functions." - ${STRIP_DSF_DEFAULT}) - -if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) - message(WARNING "Not generating debugger info for ClickHouse functions") - target_compile_options(clickhouse_functions PRIVATE "-g0") -endif() -``` - -### In the option's description, explain WHAT the option does rather than WHY it does something. - -The WHY explanation should be placed in the comment. -You may find that the option's name is self-descriptive. - -Bad: - -```cmake -option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON) -``` - -Better: - -```cmake -# Only applicable for clang. -# Turned off when building with tests or sanitizers. -option(ENABLE_THINLTO "Clang-specific link time optimisation" ON). -``` - -### Don't assume other developers know as much as you do. - -In ClickHouse, there are many tools used that an ordinary developer may not know. If you are in doubt, give a link to -the tool's docs. It won't take much of your time. - -Bad: - -```cmake -option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON) -``` - -Better (combined with the above hint): - -```cmake -# https://clang.llvm.org/docs/ThinLTO.html -# Only applicable for clang. -# Turned off when building with tests or sanitizers. -option(ENABLE_THINLTO "Clang-specific link time optimisation" ON). -``` - -Other example, bad: - -```cmake -option (USE_INCLUDE_WHAT_YOU_USE "Use 'include-what-you-use' tool" OFF) -``` - -Better: - -```cmake -# https://github.com/include-what-you-use/include-what-you-use -option (USE_INCLUDE_WHAT_YOU_USE "Reduce unneeded #include s (external tool)" OFF) -``` - -### Prefer consistent default values. - -CMake allows you to pass a plethora of values representing boolean `true/false`, e.g. `1, ON, YES, ...`. -Prefer the `ON/OFF` values, if possible. diff --git a/docs/_includes/cmake_in_clickhouse_header.md b/docs/_includes/cmake_in_clickhouse_header.md deleted file mode 100644 index 2f2e0421946..00000000000 --- a/docs/_includes/cmake_in_clickhouse_header.md +++ /dev/null @@ -1,27 +0,0 @@ -# CMake in ClickHouse - -## TL; DR How to make ClickHouse compile and link faster? - -Minimal ClickHouse build example: - -```bash -cmake .. \ - -DCMAKE_C_COMPILER=$(which clang-14) \ - -DCMAKE_CXX_COMPILER=$(which clang++-14) \ - -DCMAKE_BUILD_TYPE=Debug \ - -DENABLE_UTILS=OFF \ - -DENABLE_TESTS=OFF -``` - -## CMake files types - -1. ClickHouse's source CMake files (located in the root directory and in `/src`). -2. Arch-dependent CMake files (located in `/cmake/*os_name*`). -3. Libraries finders (search for contrib libraries, located in `/contrib/*/CMakeLists.txt`). -3. Contrib build CMake files (used instead of libraries' own CMake files, located in `/cmake/modules`) - -## List of CMake flags - -* This list is auto-generated by [this Python script](https://github.com/clickhouse/clickhouse/blob/master/docs/tools/cmake_in_clickhouse_generator.py). -* The flag name is a link to its position in the code. -* If an option's default value is itself an option, it's also a link to its position in this list. diff --git a/docs/_includes/install/tgz.sh b/docs/_includes/install/tgz.sh index 4ba5890b32b..d6d7cd8bc36 100644 --- a/docs/_includes/install/tgz.sh +++ b/docs/_includes/install/tgz.sh @@ -1,20 +1,34 @@ LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \ grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) export LATEST_VERSION -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz" -tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" +case $(uname -m) in + x86_64) ARCH=amd64 ;; + aarch64) ARCH=arm64 ;; + *) echo "Unknown architecture $(uname -m)"; exit 1 ;; +esac + +for PKG in clickhouse-common-static clickhouse-common-static-dbg clickhouse-server clickhouse-client +do + curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION-${ARCH}.tgz" \ + || curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION.tgz" +done + +exit 0 + +tar -xzvf "clickhouse-common-static-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh" -tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh" -tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-server-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" sudo /etc/init.d/clickhouse-server start -tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-client-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" diff --git a/docs/en/development/build.md b/docs/en/development/build.md index 4f06c52a1b5..dbb90f8e537 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -164,18 +164,3 @@ ClickHouse is available in pre-built binaries and packages. Binaries are portabl They are built for stable, prestable and testing releases as long as for every commit to master and for every pull request. To find the freshest build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green check mark or red cross near commit, and click to the “Details” link right after “ClickHouse Build Check”. - -## Faster builds for development: Split build configuration {#split-build} - -Normally, ClickHouse is statically linked into a single static `clickhouse` binary with minimal dependencies. This is convenient for distribution, but it means that on every change the entire binary needs to be linked, which is slow and may be inconvenient for development. There is an alternative configuration which instead creates dynamically loaded shared libraries and separate binaries `clickhouse-server`, `clickhouse-client` etc., allowing for faster incremental builds. To use it, add the following flags to your `cmake` invocation: -``` --DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 -``` - -Note that the split build has several drawbacks: -* There is no single `clickhouse` binary, and you have to run `clickhouse-server`, `clickhouse-client`, etc. -* Risk of segfault if you run any of the programs while rebuilding the project. -* You cannot run the integration tests since they only work a single complete binary. -* You can't easily copy the binaries elsewhere. Instead of moving a single binary you'll need to copy all binaries and libraries. - -[Original article](https://clickhouse.com/docs/en/development/build/) diff --git a/docs/en/development/cmake-in-clickhouse.md b/docs/en/development/cmake-in-clickhouse.md deleted file mode 100644 index 83279f5f69a..00000000000 --- a/docs/en/development/cmake-in-clickhouse.md +++ /dev/null @@ -1,545 +0,0 @@ ---- -sidebar_position: 69 -sidebar_label: CMake in ClickHouse -description: How to make ClickHouse compile and link faster ---- - -# CMake in ClickHouse - -How to make ClickHouse compile and link faster. Minimal ClickHouse build example: - -```bash -cmake .. \ - -DCMAKE_C_COMPILER=$(which clang-13) \ - -DCMAKE_CXX_COMPILER=$(which clang++-13) \ - -DCMAKE_BUILD_TYPE=Debug \ - -DENABLE_UTILS=OFF \ - -DENABLE_TESTS=OFF -``` - -## CMake files types - -1. ClickHouse source CMake files (located in the root directory and in /src). -2. Arch-dependent CMake files (located in /cmake/*os_name*). -3. Libraries finders (search for contrib libraries, located in /contrib/*/CMakeLists.txt). -4. Contrib build CMake files (used instead of libraries' own CMake files, located in /cmake/modules) - -## List of CMake flags -- The flag name is a link to its position in the code. -- If an option's default value is itself an option, it's also a link to its position in this list. - -## ClickHouse modes - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameDefault valueDescriptionComment
ENABLE_CLICKHOUSE_ALLONEnable all ClickHouse modes by defaultThe clickhouse binary is a multi purpose tool that contains multiple execution modes (client, server, etc.), each of them may be built and linked as a separate library. If you do not know what modes you need, turn this option OFF and enable SERVER and CLIENT only.
ENABLE_CLICKHOUSE_BENCHMARKENABLE_CLICKHOUSE_ALLQueries benchmarking modehttps://clickhouse.com/docs/en/operations/utilities/clickhouse-benchmark/
ENABLE_CLICKHOUSE_CLIENTENABLE_CLICKHOUSE_ALLClient mode (interactive tui/shell that connects to the server)
ENABLE_CLICKHOUSE_COMPRESSORENABLE_CLICKHOUSE_ALLData compressor and decompressorhttps://clickhouse.com/docs/en/operations/utilities/clickhouse-compressor/
ENABLE_CLICKHOUSE_COPIERENABLE_CLICKHOUSE_ALLInter-cluster data copying modehttps://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/
ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIGENABLE_CLICKHOUSE_ALLConfigs processor (extract values etc.)
ENABLE_CLICKHOUSE_FORMATENABLE_CLICKHOUSE_ALLQueries pretty-printer and formatter with syntax highlighting
ENABLE_CLICKHOUSE_GIT_IMPORTENABLE_CLICKHOUSE_ALLA tool to analyze Git repositorieshttps://presentations.clickhouse.com/matemarketing_2020/
ENABLE_CLICKHOUSE_INSTALLOFFInstall ClickHouse without .deb/.rpm/.tgz packages (having the binary only)
ENABLE_CLICKHOUSE_KEEPERENABLE_CLICKHOUSE_ALLClickHouse alternative to ZooKeeper
ENABLE_CLICKHOUSE_KEEPER_CONVERTERENABLE_CLICKHOUSE_ALLUtil allows to convert ZooKeeper logs and snapshots into clickhouse-keeper snapshot
ENABLE_CLICKHOUSE_LIBRARY_BRIDGEENABLE_CLICKHOUSE_ALLHTTP-server working like a proxy to Library dictionary source
ENABLE_CLICKHOUSE_LOCALENABLE_CLICKHOUSE_ALLLocal files fast processing modehttps://clickhouse.com/docs/en/operations/utilities/clickhouse-local/
ENABLE_CLICKHOUSE_OBFUSCATORENABLE_CLICKHOUSE_ALLTable data obfuscator (convert real data to benchmark-ready one)https://clickhouse.com/docs/en/operations/utilities/clickhouse-obfuscator/
ENABLE_CLICKHOUSE_ODBC_BRIDGEENABLE_CLICKHOUSE_ALLHTTP-server working like a proxy to ODBC driver
ENABLE_CLICKHOUSE_SERVERENABLE_CLICKHOUSE_ALLServer mode (main mode)
ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADERENABLE_CLICKHOUSE_ALLA tool to export table data files to be later put to a static files web server
- - -## External libraries -Note that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameDefault valueDescriptionComment
ENABLE_AVX0Use AVX instructions on x86_64
ENABLE_AVX20Use AVX2 instructions on x86_64
ENABLE_AVX2_FOR_SPEC_OP0Use avx2 instructions for specific operations on x86_64
ENABLE_AVX5120Use AVX512 instructions on x86_64
ENABLE_AVX512_FOR_SPEC_OP0Use avx512 instructions for specific operations on x86_64
ENABLE_BMI0Use BMI instructions on x86_64
ENABLE_CCACHEENABLE_CCACHE_BY_DEFAULTSpeedup re-compilations using ccache (external tool)https://ccache.dev/
ENABLE_CLANG_TIDYOFFUse clang-tidy static analyzerhttps://clang.llvm.org/extra/clang-tidy/
ENABLE_PCLMULQDQ1Use pclmulqdq instructions on x86_64
ENABLE_POPCNT1Use popcnt instructions on x86_64
ENABLE_SSE411Use SSE4.1 instructions on x86_64
ENABLE_SSE421Use SSE4.2 instructions on x86_64
ENABLE_SSSE31Use SSSE3 instructions on x86_64
- - -## Other flags - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameDefault valueDescriptionComment
ADD_GDB_INDEX_FOR_GOLDOFFAdd .gdb-index to resulting binaries for gold linker.Ignored if lld is used
ARCH_NATIVE0Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use.
BUILD_STANDALONE_KEEPEROFFBuild keeper as small standalone binary
CLICKHOUSE_SPLIT_BINARYOFFMake several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled
COMPILER_PIPEON-pipe compiler optionLess /tmp usage, more RAM usage.
ENABLE_BUILD_PATH_MAPPINGONEnable remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE(). It's to generate reproducible builds. See https://reproducible-builds.org/docs/build-pathReproducible builds If turned ON, remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE().
ENABLE_CHECK_HEAVY_BUILDSOFFDon't allow C++ translation units to compile too long or to take too much memory while compiling.Take care to add prlimit in command line before ccache, or else ccache thinks that prlimit is compiler, and clang++ is its input file, and refuses to work with multiple inputs, e.g in ccache log: [2021-03-31T18:06:32.655327 36900] Command line: /usr/bin/ccache prlimit --as=10000000000 --data=5000000000 --cpu=600 /usr/bin/clang++-11 - ...... std=gnu++2a -MD -MT src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -MF src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o.d -o src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -c ../src/Storages/MergeTree/IMergeTreeDataPart.cpp [2021-03-31T18:06:32.656704 36900] Multiple input files: /usr/bin/clang++-11 and ../src/Storages/MergeTree/IMergeTreeDataPart.cpp Another way would be to use --ccache-skip option before clang++-11 to make ccache ignore it.
ENABLE_COLORED_BUILDONEnable colored diagnostics in build log.
ENABLE_EXAMPLESOFFBuild all example programs in 'examples' subdirectories
ENABLE_FUZZINGOFFFuzzy testing using libfuzzer
ENABLE_LIBRARIESONEnable all external libraries by defaultTurns on all external libs like s3, kafka, ODBC, ...
ENABLE_MULTITARGET_CODEONEnable platform-dependent codeClickHouse developers may use platform-dependent code under some macro (e.g. ifdef ENABLE_MULTITARGET). If turned ON, this option defines such macro. See src/Functions/TargetSpecific.h
ENABLE_TESTSONProvide unit_test_dbms target with Google.Test unit testsIf turned ON, assumes the user has either the system GTest library or the bundled one.
ENABLE_THINLTOONClang-specific link time optimizationhttps://clang.llvm.org/docs/ThinLTO.html Applies to clang only. Disabled when building with tests or sanitizers.
FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATIONONStop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF) but is not possible to satisfyIf turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue.
GLIBC_COMPATIBILITYONEnable compatibility with older glibc libraries.Only for Linux, x86_64 or aarch64.
SPLIT_DEBUG_SYMBOLSOFFBuild stripped binaries with debug info in separate directory
LINKER_NAMEOFFLinker name or full pathExample values: lld-10, gold.
PARALLEL_COMPILE_JOBS""Maximum number of concurrent compilation jobs1 if not set
PARALLEL_LINK_JOBS""Maximum number of concurrent link jobs1 if not set
SANITIZE""Enable one of the code sanitizersPossible values: - address (ASan) - memory (MSan) - thread (TSan) - undefined (UBSan) - "" (no sanitizing)
SPLIT_SHARED_LIBRARIESOFFKeep all internal libraries as separate .so filesDEVELOPER ONLY. Faster linking if turned on.
STRIP_DEBUG_SYMBOLS_FUNCTIONSSTRIP_DSF_DEFAULTDo not generate debugger info for ClickHouse functionsProvides faster linking and lower binary size. Tradeoff is the inability to debug some source files with e.g. gdb (empty stack frames and no local variables)."
USE_DEBUG_HELPERSUSE_DEBUG_HELPERSEnable debug helpers
USE_STATIC_LIBRARIESONDisable to use shared libraries
USE_UNWINDENABLE_LIBRARIESEnable libunwind (better stacktraces)
WERROROFFEnable -Werror compiler optionUsing system libs can cause a lot of warnings in includes (on macro expansion).
WITH_COVERAGEOFFProfile the resulting binary/binariesCompiler-specific coverage flags e.g. -fcoverage-mapping for gcc
- -## Developer's guide for adding new CMake options - -#### Don't be obvious. Be informative. - -Bad: - -``` -option (ENABLE_TESTS "Enables testing" OFF) -``` - -This description is quite useless as it neither gives the viewer any additional information nor explains the option purpose. - -Better: - -``` -option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF) -``` - -If the option's purpose can't be guessed by its name, or the purpose guess may be misleading, or option has some -pre-conditions, leave a comment above the option() line and explain what it does. -The best way would be linking the docs page (if it exists). -The comment is parsed into a separate column (see below). - -Even better: - -``` -# implies ${TESTS_ARE_ENABLED} -# see tests/CMakeLists.txt for implementation detail. -option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF) -``` - -#### If the option's state could produce unwanted (or unusual) result, explicitly warn the user. - -Suppose you have an option that may strip debug symbols from the ClickHouse part. -This can speed up the linking process, but produces a binary that cannot be debugged. -In that case, prefer explicitly raising a warning telling the developer that he may be doing something wrong. -Also, such options should be disabled if applies. - -Bad: - -``` -option(STRIP_DEBUG_SYMBOLS_FUNCTIONS - "Do not generate debugger info for ClickHouse functions. - ${STRIP_DSF_DEFAULT}) - -if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) - target_compile_options(clickhouse_functions PRIVATE "-g0") -endif() -``` - -Better: - -``` -# Provides faster linking and lower binary size. -# Tradeoff is the inability to debug some source files with e.g. gdb -# (empty stack frames and no local variables)." -option(STRIP_DEBUG_SYMBOLS_FUNCTIONS - "Do not generate debugger info for ClickHouse functions." - ${STRIP_DSF_DEFAULT}) - -if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) - message(WARNING "Not generating debugger info for ClickHouse functions") - target_compile_options(clickhouse_functions PRIVATE "-g0") -endif() -``` - -#### In the option's description, explain WHAT the option does rather than WHY it does something. -The WHY explanation should be placed in the comment. You may find that the option's name is self-descriptive. - -Bad: - -``` -option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON) -``` - -Better: - -``` -# Only applicable for clang. -# Turned off when building with tests or sanitizers. -option(ENABLE_THINLTO "Clang-specific link time optimisation" ON). -``` - -#### Don't assume other developers know as much as you do. -In ClickHouse, there are many tools used that an ordinary developer may not know. If you are in doubt, give a link to -the tool's docs. It won't take much of your time. - -Bad: - -``` -option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON) -``` - -Better (combined with the above hint): - -``` -# https://clang.llvm.org/docs/ThinLTO.html -# Only applicable for clang. -# Turned off when building with tests or sanitizers. -option(ENABLE_THINLTO "Clang-specific link time optimisation" ON). -``` - -Other example, bad: - -``` -option (USE_INCLUDE_WHAT_YOU_USE "Use 'include-what-you-use' tool" OFF) -``` - -Better: - -``` -# https://github.com/include-what-you-use/include-what-you-use -option (USE_INCLUDE_WHAT_YOU_USE "Reduce unneeded #include s (external tool)" OFF) -``` - -#### Prefer consistent default values. -CMake allows you to pass a plethora of values representing boolean true/false, e.g. 1, ON, YES, .... - -Prefer the ON/OFF values, if possible. - diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index f8fcead3ca2..7b2da4416d6 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -123,12 +123,10 @@ Builds ClickHouse in various configurations for use in further steps. You have t - **Build log**: link to the building and files copying log, useful when build failed. - **Build time**. - **Artifacts**: build result files (with `XXX` being the server version e.g. `20.8.1.4344`). - - `clickhouse-client_XXX_all.deb` + - `clickhouse-client_XXX_amd64.deb` - `clickhouse-common-static-dbg_XXX[+asan, +msan, +ubsan, +tsan]_amd64.deb` - `clickhouse-common-staticXXX_amd64.deb` - - `clickhouse-server_XXX_all.deb` - - `clickhouse_XXX_amd64.buildinfo` - - `clickhouse_XXX_amd64.changes` + - `clickhouse-server_XXX_amd64.deb` - `clickhouse`: Main built binary. - `clickhouse-odbc-bridge` - `unit_tests_dbms`: GoogleTest binary with ClickHouse unit tests. diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 8c1f6b5fc9e..13af1be5097 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -6,93 +6,14 @@ description: A list of third-party libraries used # Third-Party Libraries Used -The list of third-party libraries: - -| Library name | License type | -|:-|:-| -| abseil-cpp | [Apache](https://github.com/ClickHouse-Extras/abseil-cpp/blob/4f3b686f86c3ebaba7e4e926e62a79cb1c659a54/LICENSE) | -| AMQP-CPP | [Apache](https://github.com/ClickHouse-Extras/AMQP-CPP/blob/1a6c51f4ac51ac56610fa95081bd2f349911375a/LICENSE) | -| arrow | [Apache](https://github.com/ClickHouse-Extras/arrow/blob/078e21bad344747b7656ef2d7a4f7410a0a303eb/LICENSE.txt) | -| avro | [Apache](https://github.com/ClickHouse-Extras/avro/blob/e43c46e87fd32eafdc09471e95344555454c5ef8/LICENSE.txt) | -| aws | [Apache](https://github.com/ClickHouse-Extras/aws-sdk-cpp/blob/7d48b2c8193679cc4516e5bd68ae4a64b94dae7d/LICENSE.txt) | -| aws-c-common | [Apache](https://github.com/ClickHouse-Extras/aws-c-common/blob/736a82d1697c108b04a277e66438a7f4e19b6857/LICENSE) | -| aws-c-event-stream | [Apache](https://github.com/ClickHouse-Extras/aws-c-event-stream/blob/3bc33662f9ccff4f4cbcf9509cc78c26e022fde0/LICENSE) | -| aws-checksums | [Apache](https://github.com/ClickHouse-Extras/aws-checksums/blob/519d6d9093819b6cf89ffff589a27ef8f83d0f65/LICENSE) | -| base58 | [MIT](https://github.com/ClickHouse/base-x/blob/3e58874643c087f57e82b0ff03825c933fab945a/LICENSE) | -| base64 | [BSD 2-clause](https://github.com/ClickHouse-Extras/Turbo-Base64/blob/af9b331f2b4f30b41c70f3a571ff904a8251c1d3/LICENSE) | -| boost | [Boost](https://github.com/ClickHouse-Extras/boost/blob/9cf09dbfd55a5c6202dedbdf40781a51b02c2675/LICENSE_1_0.txt) | -| boringssl | [BSD](https://github.com/ClickHouse-Extras/boringssl/blob/a6a2e2ab3e44d97ce98e51c558e989f211de7eb3/LICENSE) | -| brotli | [MIT](https://github.com/google/brotli/blob/63be8a99401992075c23e99f7c84de1c653e39e2/LICENSE) | -| capnproto | [MIT](https://github.com/capnproto/capnproto/blob/a00ccd91b3746ef2ab51d40fe3265829949d1ace/LICENSE) | -| cassandra | [Apache](https://github.com/ClickHouse-Extras/cpp-driver/blob/eb9b68dadbb4417a2c132ad4a1c2fa76e65e6fc1/LICENSE.txt) | -| cctz | [Apache](https://github.com/ClickHouse-Extras/cctz/blob/c0f1bcb97fd2782f7c3f972fadd5aad5affac4b8/LICENSE.txt) | -| cityhash102 | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/cityhash102/COPYING) | -| cppkafka | [BSD 2-clause](https://github.com/mfontanini/cppkafka/blob/5a119f689f8a4d90d10a9635e7ee2bee5c127de1/LICENSE) | -| croaring | [Apache](https://github.com/RoaringBitmap/CRoaring/blob/2c867e9f9c9e2a3a7032791f94c4c7ae3013f6e0/LICENSE) | -| curl | [Apache](https://github.com/curl/curl/blob/3b8bbbbd1609c638a3d3d0acb148a33dedb67be3/docs/LICENSE-MIXING.md) | -| cyrus-sasl | [BSD 2-clause](https://github.com/ClickHouse-Extras/cyrus-sasl/blob/e6466edfd638cc5073debe941c53345b18a09512/COPYING) | -| double-conversion | [BSD 3-clause](https://github.com/google/double-conversion/blob/cf2f0f3d547dc73b4612028a155b80536902ba02/LICENSE) | -| dragonbox | [Apache](https://github.com/ClickHouse-Extras/dragonbox/blob/923705af6fd953aa948fc175f6020b15f7359838/LICENSE-Apache2-LLVM) | -| fast_float | [Apache](https://github.com/fastfloat/fast_float/blob/7eae925b51fd0f570ccd5c880c12e3e27a23b86f/LICENSE) | -| fastops | [MIT](https://github.com/ClickHouse-Extras/fastops/blob/88752a5e03cf34639a4a37a4b41d8b463fffd2b5/LICENSE) | -| flatbuffers | [Apache](https://github.com/ClickHouse-Extras/flatbuffers/blob/eb3f827948241ce0e701516f16cd67324802bce9/LICENSE.txt) | -| fmtlib | [Unknown](https://github.com/fmtlib/fmt/blob/c108ee1d590089ccf642fc85652b845924067af2/LICENSE.rst) | -| gcem | [Apache](https://github.com/kthohr/gcem/blob/8d4f1b5d76ea8f6ff12f3f4f34cda45424556b00/LICENSE) | -| googletest | [BSD 3-clause](https://github.com/google/googletest/blob/e7e591764baba0a0c3c9ad0014430e7a27331d16/LICENSE) | -| grpc | [Apache](https://github.com/ClickHouse-Extras/grpc/blob/60c986e15cae70aade721d26badabab1f822fdd6/LICENSE) | -| h3 | [Apache](https://github.com/ClickHouse-Extras/h3/blob/c7f46cfd71fb60e2fefc90e28abe81657deff735/LICENSE) | -| vectorscan | [Boost](https://github.com/ClickHouse-Extras/hyperscan/blob/73695e419c27af7fe2a099c7aa57931cc02aea5d/LICENSE) | -| icu | [Public Domain](https://github.com/unicode-org/icu/blob/a56dde820dc35665a66f2e9ee8ba58e75049b668/icu4c/LICENSE) | -| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/72d9a4a7febc904e2b0a534ccb25ae40fac5f1e5/LICENSE) | -| jemalloc | [BSD 2-clause](https://github.com/ClickHouse-Extras/jemalloc/blob/e6891d9746143bf2cf617493d880ba5a0b9a3efd/COPYING) | -| krb5 | [MIT](https://github.com/ClickHouse-Extras/krb5/blob/5149dea4e2be0f67707383d2682b897c14631374/src/lib/gssapi/LICENSE) | -| libc-headers | [LGPL](https://github.com/ClickHouse-Extras/libc-headers/blob/a720b7105a610acbd7427eea475a5b6810c151eb/LICENSE) | -| libcpuid | [BSD 2-clause](https://github.com/ClickHouse-Extras/libcpuid/blob/8db3b8d2d32d22437f063ce692a1b9bb15e42d18/COPYING) | -| libcxx | [Apache](https://github.com/ClickHouse-Extras/libcxx/blob/2fa892f69acbaa40f8a18c6484854a6183a34482/LICENSE.TXT) | -| libcxxabi | [Apache](https://github.com/ClickHouse-Extras/libcxxabi/blob/df8f1e727dbc9e2bedf2282096fa189dc3fe0076/LICENSE.TXT) | -| libdivide | [zLib](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | -| libfarmhash | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libfarmhash/COPYING) | -| libgsasl | [LGPL](https://github.com/ClickHouse-Extras/libgsasl/blob/383ee28e82f69fa16ed43b48bd9c8ee5b313ab84/LICENSE) | -| libhdfs3 | [Apache](https://github.com/ClickHouse-Extras/libhdfs3/blob/095b9d48b400abb72d967cb0539af13b1e3d90cf/LICENSE.txt) | -| libmetrohash | [Apache](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libmetrohash/LICENSE) | -| libpq | [Unknown](https://github.com/ClickHouse-Extras/libpq/blob/e071ea570f8985aa00e34f5b9d50a3cfe666327e/COPYRIGHT) | -| libpqxx | [BSD 3-clause](https://github.com/ClickHouse-Extras/libpqxx/blob/357608d11b7a1961c3fb7db2ef9a5dbb2e87da77/COPYING) | -| librdkafka | [MIT](https://github.com/ClickHouse-Extras/librdkafka/blob/b8554f1682062c85ba519eb54ef2f90e02b812cb/LICENSE.murmur2) | -| libunwind | [Apache](https://github.com/ClickHouse-Extras/libunwind/blob/6b816d2fba3991f8fd6aaec17d92f68947eab667/LICENSE.TXT) | -| libuv | [BSD](https://github.com/ClickHouse-Extras/libuv/blob/e2e9b7e9f978ce8a1367b5fe781d97d1ce9f94ab/LICENSE) | -| llvm | [Apache](https://github.com/ClickHouse-Extras/llvm/blob/e5751459412bce1391fb7a2e9bbc01e131bf72f1/llvm/LICENSE.TXT) | -| lz4 | [BSD](https://github.com/lz4/lz4/blob/f39b79fb02962a1cd880bbdecb6dffba4f754a11/LICENSE) | -| mariadb-connector-c | [LGPL](https://github.com/ClickHouse-Extras/mariadb-connector-c/blob/5f4034a3a6376416504f17186c55fe401c6d8e5e/COPYING.LIB) | -| miniselect | [Boost](https://github.com/danlark1/miniselect/blob/be0af6bd0b6eb044d1acc4f754b229972d99903a/LICENSE_1_0.txt) | -| msgpack-c | [Boost](https://github.com/msgpack/msgpack-c/blob/46684265d50b5d1b062d4c5c428ba08462844b1d/LICENSE_1_0.txt) | -| murmurhash | [Public Domain](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/murmurhash/LICENSE) | -| NuRaft | [Apache](https://github.com/ClickHouse-Extras/NuRaft/blob/7ecb16844af6a9c283ad432d85ecc2e7d1544676/LICENSE) | -| openldap | [Unknown](https://github.com/ClickHouse-Extras/openldap/blob/0208811b6043ca06fda8631a5e473df1ec515ccb/LICENSE) | -| orc | [Apache](https://github.com/ClickHouse-Extras/orc/blob/0a936f6bbdb9303308973073f8623b5a8d82eae1/LICENSE) | -| poco | [Boost](https://github.com/ClickHouse-Extras/poco/blob/7351c4691b5d401f59e3959adfc5b4fa263b32da/LICENSE) | -| protobuf | [BSD 3-clause](https://github.com/ClickHouse-Extras/protobuf/blob/75601841d172c73ae6bf4ce8121f42b875cdbabd/LICENSE) | -| rapidjson | [MIT](https://github.com/ClickHouse-Extras/rapidjson/blob/c4ef90ccdbc21d5d5a628d08316bfd301e32d6fa/bin/jsonschema/LICENSE) | -| re2 | [BSD 3-clause](https://github.com/google/re2/blob/13ebb377c6ad763ca61d12dd6f88b1126bd0b911/LICENSE) | -| replxx | [BSD 3-clause](https://github.com/ClickHouse-Extras/replxx/blob/c81be6c68b146f15f2096b7ef80e3f21fe27004c/LICENSE.md) | -| rocksdb | [BSD 3-clause](https://github.com/ClickHouse-Extras/rocksdb/blob/b6480c69bf3ab6e298e0d019a07fd4f69029b26a/LICENSE.leveldb) | -| s2geometry | [Apache](https://github.com/ClickHouse-Extras/s2geometry/blob/20ea540d81f4575a3fc0aea585aac611bcd03ede/LICENSE) | -| sentry-native | [MIT](https://github.com/ClickHouse-Extras/sentry-native/blob/94644e92f0a3ff14bd35ed902a8622a2d15f7be4/LICENSE) | -| simdjson | [Apache](https://github.com/simdjson/simdjson/blob/8df32cea3359cb30120795da6020b3b73da01d38/LICENSE) | -| snappy | [Public Domain](https://github.com/google/snappy/blob/3f194acb57e0487531c96b97af61dcbd025a78a3/COPYING) | -| sparsehash-c11 | [BSD 3-clause](https://github.com/sparsehash/sparsehash-c11/blob/cf0bffaa456f23bc4174462a789b90f8b6f5f42f/LICENSE) | -| stats | [Apache](https://github.com/kthohr/stats/blob/b6dd459c10a88c7ea04693c007e9e35820c5d9ad/LICENSE) | -| thrift | [Apache](https://github.com/apache/thrift/blob/010ccf0a0c7023fea0f6bf4e4078ebdff7e61982/LICENSE) | -| unixodbc | [LGPL](https://github.com/ClickHouse-Extras/UnixODBC/blob/b0ad30f7f6289c12b76f04bfb9d466374bb32168/COPYING) | -| xz | [Public Domain](https://github.com/xz-mirror/xz/blob/869b9d1b4edd6df07f819d360d306251f8147353/COPYING) | -| zlib-ng | [zLib](https://github.com/ClickHouse-Extras/zlib-ng/blob/6a5e93b9007782115f7f7e5235dedc81c4f1facb/LICENSE.md) | -| zstd | [BSD](https://github.com/facebook/zstd/blob/a488ba114ec17ea1054b9057c26a046fc122b3b6/LICENSE) | - -The list of third-party libraries can be obtained by the following query: +ClickHouse utilizes third-party libraries for different purposes, e.g., to connect to other databases, to decode (encode) data during load (save) from (to) disk or to implement certain specialized SQL functions. To be independent of the available libraries in the target system, each third-party library is imported as a Git submodule into ClickHouse's source tree and compiled and linked with ClickHouse. A list of third-party libraries and their licenses can be obtained by the following query: ``` sql SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'; ``` +(Note that the listed libraries are the ones located in the `contrib/` directory of the ClickHouse repository. Depending on the build options, some of of the libraries may have not been compiled, and as a result, their functionality may not be available at runtime. + [Example](https://play.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) ## Adding new third-party libraries and maintaining patches in third-party libraries {#adding-third-party-libraries} diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 38d99430193..77ddae6a756 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -276,3 +276,23 @@ Testing will commence as soon as ClickHouse employees label your PR with a tag The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “ClickHouse build check” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear). Most probably some of the builds will fail at first times. This is due to the fact that we check builds both with gcc as well as with clang, with almost all of existing warnings (always with the `-Werror` flag) enabled for clang. On that same page, you can find all of the build logs so that you do not have to build ClickHouse in all of the possible ways. + +## Faster builds for development: Split build configuration {#split-build} + +ClickHouse is normally statically linked into a single static `clickhouse` binary with minimal dependencies. This is convenient for distribution, but it means that for every change the entire binary needs to be re-linked, which is slow and inconvenient for development. As an alternative, you can instead build dynamically linked shared libraries and separate binaries `clickhouse-server`, `clickhouse-client` etc., allowing for faster incremental builds. To use it, add the following flags to your `cmake` invocation: +``` +-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 +``` + +Note that the split build has several drawbacks: +* There is no single `clickhouse` binary, and you have to run `clickhouse-server`, `clickhouse-client`, etc. +* Risk of segfault if you run any of the programs while rebuilding the project. +* You cannot run the integration tests since they only work a single complete binary. +* You can't easily copy the binaries elsewhere. Instead of moving a single binary you'll need to copy all binaries and libraries. + +If you are not interested in functionality provided by third-party libraries, you can further speed up the build using `cmake` options +``` +-DENABLE_LIBRARIES=0 -DENABLE_EMBEDDED_COMPILER=0 +``` + +In case of problems with any of the development options, you are on your own! diff --git a/docs/en/engines/table-engines/integrations/nats.md b/docs/en/engines/table-engines/integrations/nats.md new file mode 100644 index 00000000000..7c975653f0e --- /dev/null +++ b/docs/en/engines/table-engines/integrations/nats.md @@ -0,0 +1,163 @@ +--- +sidebar_position: 14 +sidebar_label: NATS +--- + +# NATS Engine {#redisstreams-engine} + +This engine allows integrating ClickHouse with [NATS](https://nats.io/). + +`NATS` lets you: + +- Publish or subcribe to message subjects. +- Process new messages as they become available. + +## Creating a Table {#table_engine-redisstreams-creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = NATS SETTINGS + nats_url = 'host:port', + nats_subjects = 'subject1,subject2,...', + nats_format = 'data_format'[,] + [nats_row_delimiter = 'delimiter_symbol',] + [nats_schema = '',] + [nats_num_consumers = N,] + [nats_queue_group = 'group_name',] + [nats_secure = false,] + [nats_max_reconnect = N,] + [nats_reconnect_wait = N,] + [nats_server_list = 'host1:port1,host2:port2,...',] + [nats_skip_broken_messages = N,] + [nats_max_block_size = N,] + [nats_flush_interval_ms = N,] + [nats_username = 'user',] + [nats_password = 'password'] + [redis_password = 'clickhouse'] +``` + +Required parameters: + +- `nats_url` – host:port (for example, `localhost:5672`).. +- `nats_subjects` – List of subject for NATS table to subscribe/publsh to. Supports wildcard subjects like `foo.*.bar` or `baz.>` +- `nats_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. + +Optional parameters: + +- `nats_row_delimiter` – Delimiter character, which ends the message. +- `nats_schema` – Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. +- `nats_num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. +- `nats_queue_group` – Name for queue group of NATS subscribers. Default is the table name. +- `nats_max_reconnect` – Maximum amount of reconnection attempts per try to connect to NATS. Default: `5`. +- `nats_reconnect_wait` – Amount of time in milliseconds to sleep between each reconnect attempt. Default: `5000`. +- `nats_server_list` - Server list for connection. Can be specified to connect to NATS cluster. +- `nats_skip_broken_messages` - NATS message parser tolerance to schema-incompatible messages per block. Default: `0`. If `nats_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). +- `nats_max_block_size` - Number of row collected by poll(s) for flushing data from NATS. +- `nats_flush_interval_ms` - Timeout for flushing data read from NATS. +- `nats_username` - NATS username. +- `nats_password` - NATS password. +- `nats_token` - NATS auth token. + +SSL connection: + +For secure connection use `nats_secure = 1`. +The default behaviour of the used library is not to check if the created TLS connection is sufficiently secure. Whether the certificate is expired, self-signed, missing or invalid: the connection is simply permitted. More strict checking of certificates can possibly be implemented in the future. + +Writing to NATS table: + +If table reads only from one subject, any insert will publish to the same subject. +However, if table reads from multiple subjects, we need to specify which subject we want to publish to. +That is why whenever inserting into table with multiple subjects, setting `stream_like_engine_insert_queue` is needed. +You can select one of the subjects the table reads from and publish your data there. For example: + +``` sql + CREATE TABLE queue ( + key UInt64, + value UInt64 + ) ENGINE = NATS + SETTINGS nats_url = 'localhost:4444', + nats_subjects = 'subject1,subject2', + nats_format = 'JSONEachRow'; + + INSERT INTO queue + SETTINGS stream_like_engine_insert_queue = 'subject2' + VALUES (1, 1); +``` + +Also format settings can be added along with nats-related settings. + +Example: + +``` sql + CREATE TABLE queue ( + key UInt64, + value UInt64, + date DateTime + ) ENGINE = NATS + SETTINGS nats_url = 'localhost:4444', + nats_subjects = 'subject1', + nats_format = 'JSONEachRow', + date_time_input_format = 'best_effort'; +``` + +The NATS server configuration can be added using the ClickHouse config file. + More specifically you can add Redis password for NATS engine: + +``` xml + + click + house + clickhouse + +``` + +## Description {#description} + +`SELECT` is not particularly useful for reading messages (except for debugging), because each message can be read only once. It is more practical to create real-time threads using [materialized views](../../../sql-reference/statements/create/view.md). To do this: + +1. Use the engine to create a NATS consumer and consider it a data stream. +2. Create a table with the desired structure. +3. Create a materialized view that converts data from the engine and puts it into a previously created table. + +When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from NATS and convert them to the required format using `SELECT`. +One NATS table can have as many materialized views as you like, they do not read data from the table directly, but receive new records (in blocks), this way you can write to several tables with different detail level (with grouping - aggregation and without). + +Example: + +``` sql + CREATE TABLE queue ( + key UInt64, + value UInt64 + ) ENGINE = NATS + SETTINGS nats_url = 'localhost:4444', + nats_subjects = 'subject1', + nats_format = 'JSONEachRow', + date_time_input_format = 'best_effort'; + + CREATE TABLE daily (key UInt64, value UInt64) + ENGINE = MergeTree() ORDER BY key; + + CREATE MATERIALIZED VIEW consumer TO daily + AS SELECT key, value FROM queue; + + SELECT key, value FROM daily ORDER BY key; +``` + +To stop receiving streams data or to change the conversion logic, detach the materialized view: + +``` sql + DETACH TABLE consumer; + ATTACH TABLE consumer; +``` + +If you want to change the target table by using `ALTER`, we recommend disabling the material view to avoid discrepancies between the target table and the data from the view. + +## Virtual Columns {#virtual-columns} + +- `_subject` - NATS message subject. + +[Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/nats/) diff --git a/docs/en/getting-started/example-datasets/metrica.md b/docs/en/getting-started/example-datasets/metrica.md index da0286d8c05..300bbe58d3f 100644 --- a/docs/en/getting-started/example-datasets/metrica.md +++ b/docs/en/getting-started/example-datasets/metrica.md @@ -87,7 +87,7 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" The hits and visits dataset is used in the ClickHouse test routines, this is one of the queries from the test suite. The rest -of the tests are refernced in the *Next Steps* section at the +of the tests are referenced in the *Next Steps* section at the end of this page. ```sql diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 8505b040fa3..a5e6495d8d8 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -127,22 +127,36 @@ After that downloaded archives should be unpacked and installed with installatio LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \ grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) export LATEST_VERSION -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION-amd64.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION-amd64.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION-amd64.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION-amd64.tgz" -tar -xzvf "clickhouse-common-static-$LATEST_VERSION-amd64.tgz" +case $(uname -m) in + x86_64) ARCH=amd64 ;; + aarch64) ARCH=arm64 ;; + *) echo "Unknown architecture $(uname -m)"; exit 1 ;; +esac + +for PKG in clickhouse-common-static clickhouse-common-static-dbg clickhouse-server clickhouse-client +do + curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION-${ARCH}.tgz" \ + || curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION.tgz" +done + +exit 0 + +tar -xzvf "clickhouse-common-static-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh" -tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION-amd64.tgz" +tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh" -tar -xzvf "clickhouse-server-$LATEST_VERSION-amd64.tgz" +tar -xzvf "clickhouse-server-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" sudo /etc/init.d/clickhouse-server start -tar -xzvf "clickhouse-client-$LATEST_VERSION-amd64.tgz" +tar -xzvf "clickhouse-client-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" ``` diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 00fa382fd4d..5d8ed9cdacd 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -11,68 +11,69 @@ results of a `SELECT`, and to perform `INSERT`s into a file-backed table. The supported formats are: | Format | Input | Output | -|-------------------------------------------------------------------------------------------|-------|--------| -| [TabSeparated](#tabseparated) | ✔ | ✔ | -| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | -| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | -| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | -| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ | -| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ | -| [Template](#format-template) | ✔ | ✔ | -| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | -| [CSV](#csv) | ✔ | ✔ | -| [CSVWithNames](#csvwithnames) | ✔ | ✔ | -| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ | -| [CustomSeparated](#format-customseparated) | ✔ | ✔ | -| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ | -| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ | -| [Values](#data-format-values) | ✔ | ✔ | -| [Vertical](#vertical) | ✗ | ✔ | -| [JSON](#json) | ✗ | ✔ | -| [JSONAsString](#jsonasstring) | ✔ | ✗ | -| [JSONStrings](#jsonstrings) | ✗ | ✔ | -| [JSONColumns](#jsoncolumns) | ✔ | ✔ | -| [JSONColumnsWithMetadata](#jsoncolumnswithmetadata) | ✗ | ✔ | -| [JSONCompact](#jsoncompact) | ✗ | ✔ | -| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | -| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | -| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | -| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | -| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | -| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | -| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | -| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ | -| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | -| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ | -| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ | -| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ | -| [TSKV](#tskv) | ✔ | ✔ | -| [Pretty](#pretty) | ✗ | ✔ | -| [PrettyCompact](#prettycompact) | ✗ | ✔ | -| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | -| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | -| [PrettySpace](#prettyspace) | ✗ | ✔ | -| [Prometheus](#prometheus) | ✗ | ✔ | -| [Protobuf](#protobuf) | ✔ | ✔ | -| [ProtobufSingle](#protobufsingle) | ✔ | ✔ | -| [Avro](#data-format-avro) | ✔ | ✔ | -| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | -| [Parquet](#data-format-parquet) | ✔ | ✔ | -| [Arrow](#data-format-arrow) | ✔ | ✔ | -| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | -| [ORC](#data-format-orc) | ✔ | ✔ | -| [RowBinary](#rowbinary) | ✔ | ✔ | -| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [Native](#native) | ✔ | ✔ | -| [Null](#null) | ✗ | ✔ | -| [XML](#xml) | ✗ | ✔ | -| [CapnProto](#capnproto) | ✔ | ✔ | -| [LineAsString](#lineasstring) | ✔ | ✗ | -| [Regexp](#data-format-regexp) | ✔ | ✗ | -| [RawBLOB](#rawblob) | ✔ | ✔ | -| [MsgPack](#msgpack) | ✔ | ✔ | -| [MySQLDump](#mysqldump) | ✔ | ✗ | +|-------------------------------------------------------------------------------------------|------|--------| +| [TabSeparated](#tabseparated) | ✔ | ✔ | +| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | +| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | +| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | +| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ | +| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ | +| [Template](#format-template) | ✔ | ✔ | +| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | +| [CSV](#csv) | ✔ | ✔ | +| [CSVWithNames](#csvwithnames) | ✔ | ✔ | +| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ | +| [CustomSeparated](#format-customseparated) | ✔ | ✔ | +| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ | +| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ | +| [SQLInsert](#sqlinsert) | ✗ | ✔ | +| [Values](#data-format-values) | ✔ | ✔ | +| [Vertical](#vertical) | ✗ | ✔ | +| [JSON](#json) | ✗ | ✔ | +| [JSONAsString](#jsonasstring) | ✔ | ✗ | +| [JSONStrings](#jsonstrings) | ✗ | ✔ | +| [JSONColumns](#jsoncolumns) | ✔ | ✔ | +| [JSONColumnsWithMetadata](#jsoncolumnswithmetadata) | ✗ | ✔ | +| [JSONCompact](#jsoncompact) | ✗ | ✔ | +| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | +| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | +| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | +| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | +| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | +| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | +| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | +| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ | +| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | +| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ | +| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ | +| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ | +| [TSKV](#tskv) | ✔ | ✔ | +| [Pretty](#pretty) | ✗ | ✔ | +| [PrettyCompact](#prettycompact) | ✗ | ✔ | +| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | +| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | +| [PrettySpace](#prettyspace) | ✗ | ✔ | +| [Prometheus](#prometheus) | ✗ | ✔ | +| [Protobuf](#protobuf) | ✔ | ✔ | +| [ProtobufSingle](#protobufsingle) | ✔ | ✔ | +| [Avro](#data-format-avro) | ✔ | ✔ | +| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | +| [Parquet](#data-format-parquet) | ✔ | ✔ | +| [Arrow](#data-format-arrow) | ✔ | ✔ | +| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | +| [ORC](#data-format-orc) | ✔ | ✔ | +| [RowBinary](#rowbinary) | ✔ | ✔ | +| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [Native](#native) | ✔ | ✔ | +| [Null](#null) | ✗ | ✔ | +| [XML](#xml) | ✗ | ✔ | +| [CapnProto](#capnproto) | ✔ | ✔ | +| [LineAsString](#lineasstring) | ✔ | ✗ | +| [Regexp](#data-format-regexp) | ✔ | ✗ | +| [RawBLOB](#rawblob) | ✔ | ✔ | +| [MsgPack](#msgpack) | ✔ | ✔ | +| [MySQLDump](#mysqldump) | ✔ | ✗ | You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section. @@ -468,6 +469,34 @@ Also prints the header row with column names, similar to [TabSeparatedWithNames] Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). +## SQLInsert {#sqlinsert} + +Outputs data as a sequence of `INSERT INTO table (columns...) VALUES (...), (...) ...;` statements. + +Example: + +```sql +SELECT number AS x, number + 1 AS y, 'Hello' AS z FROM numbers(10) FORMAT SQLInsert SETTINGS output_format_sql_insert_max_batch_size = 2 +``` + +```sql +INSERT INTO table (x, y, z) VALUES (0, 1, 'Hello'), (1, 2, 'Hello'); +INSERT INTO table (x, y, z) VALUES (2, 3, 'Hello'), (3, 4, 'Hello'); +INSERT INTO table (x, y, z) VALUES (4, 5, 'Hello'), (5, 6, 'Hello'); +INSERT INTO table (x, y, z) VALUES (6, 7, 'Hello'), (7, 8, 'Hello'); +INSERT INTO table (x, y, z) VALUES (8, 9, 'Hello'), (9, 10, 'Hello'); +``` + +To read data output by this format ypu can use [MySQLDump](#mysqldump) input format. + +### SQLInsert format settings {#sqlinsert-format-settings} + +- [output_format_sql_insert_max_batch_size](../operations/settings/settings.md#output_format_sql_insert_max_batch_size) - The maximum number of rows in one INSERT statement. Default value - `65505`. +- [output_format_sql_insert_table_name](../operations/settings/settings.md#output_format_sql_insert_table_name) - The name of table in the output INSERT query. Default value - `'table'`. +- [output_format_sql_insert_include_column_names](../operations/settings/settings.md#output_format_sql_insert_include_column_names) - Include column names in INSERT query. Default value - `true`. +- [output_format_sql_insert_use_replace](../operations/settings/settings.md#output_format_sql_insert_use_replace) - Use REPLACE statement instead of INSERT. Default value - `false`. +- [output_format_sql_insert_quote_names](../operations/settings/settings.md#output_format_sql_insert_quote_names) - Quote column names with "\`" characters . Default value - `true`. + ## JSON {#json} Outputs data in JSON format. Besides data tables, it also outputs column names and types, along with some additional information: the total number of output rows, and the number of rows that could have been output if there weren’t a LIMIT. Example: diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index 705b9ef42c0..8067b18cc35 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -51,7 +51,6 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasn’t don - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs) - [Klickhouse](https://github.com/Protryon/klickhouse) - R - - [clickhouse-r](https://github.com/hannesmuehleisen/clickhouse-r) - [RClickHouse](https://github.com/IMSMWU/RClickHouse) - Java - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 85265448c03..75c2aa57b32 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4637,3 +4637,35 @@ Possible values: - 1 — Enabled. Default value: 1. + +## SQLInsert format settings {$sqlinsert-format-settings} + +### output_format_sql_insert_max_batch_size {#output_format_sql_insert_max_batch_size} + +The maximum number of rows in one INSERT statement. + +Default value: `65505`. + +### output_format_sql_insert_table_name {#output_format_sql_insert_table_name} + +The name of table that will be used in the output INSERT statement. + +Default value: `'table''`. + +### output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names} + +Include column names in INSERT statement. + +Default value: `true`. + +### output_format_sql_insert_use_replace {#output_format_sql_insert_use_replace} + +Use REPLACE keyword instead of INSERT. + +Default value: `false`. + +### output_format_sql_insert_quote_names {#output_format_sql_insert_quote_names} + +Quote column names with "`" characters + +Default value: `true`. diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index f364bc85088..5325311a9e6 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -128,7 +128,8 @@ You should never use manually written scripts to transfer data between different If you want to divide an existing ZooKeeper cluster into two, the correct way is to increase the number of its replicas and then reconfigure it as two independent clusters. -You can run ClickHouse Keeper on the same server as ClickHouse, but do not run ZooKeeper on the same servers as ClickHouse. Because ZooKeeper is very sensitive for latency and ClickHouse may utilize all available system resources. +You can run ClickHouse Keeper on the same server as ClickHouse in test environments, or in environments with low ingestion rate. +For production environments we suggest to use separate servers for ClickHouse and ZooKeeper/Keeper, or place ClickHouse files and Keeper files on to separate disks. Because ZooKeeper/Keeper are very sensitive for disk latency and ClickHouse may utilize all available system resources. You can have ZooKeeper observers in an ensemble but ClickHouse servers should not interact with observers. diff --git a/docs/en/sql-reference/statements/create/function.md b/docs/en/sql-reference/statements/create/function.md index 7d9a727a70d..0a452b6c4d2 100644 --- a/docs/en/sql-reference/statements/create/function.md +++ b/docs/en/sql-reference/statements/create/function.md @@ -10,7 +10,7 @@ Creates a user defined function from a lambda expression. The expression must co **Syntax** ```sql -CREATE FUNCTION name AS (parameter0, ...) -> expression +CREATE FUNCTION name [ON CLUSTER cluster] AS (parameter0, ...) -> expression ``` A function can have an arbitrary number of parameters. diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index 49862cbfc02..9621cd4944f 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -105,7 +105,7 @@ System functions can not be dropped. **Syntax** ``` sql -DROP FUNCTION [IF EXISTS] function_name +DROP FUNCTION [IF EXISTS] function_name [on CLUSTER cluster] ``` **Example** diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 09e91530473..9f3eabc73ae 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -124,22 +124,36 @@ sudo yum install clickhouse-server clickhouse-client LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \ grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) export LATEST_VERSION -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz" -tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" +case $(uname -m) in + x86_64) ARCH=amd64 ;; + aarch64) ARCH=arm64 ;; + *) echo "Unknown architecture $(uname -m)"; exit 1 ;; +esac + +for PKG in clickhouse-common-static clickhouse-common-static-dbg clickhouse-server clickhouse-client +do + curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION-${ARCH}.tgz" \ + || curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION.tgz" +done + +exit 0 + +tar -xzvf "clickhouse-common-static-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh" -tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh" -tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-server-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" sudo /etc/init.d/clickhouse-server start -tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-client-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" ``` diff --git a/docs/ru/interfaces/third-party/client-libraries.md b/docs/ru/interfaces/third-party/client-libraries.md index 702b53622da..ab2c9419b7f 100644 --- a/docs/ru/interfaces/third-party/client-libraries.md +++ b/docs/ru/interfaces/third-party/client-libraries.md @@ -45,7 +45,6 @@ sidebar_label: "Клиентские библиотеки от сторонни - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs) - [Klickhouse](https://github.com/Protryon/klickhouse) - R - - [clickhouse-r](https://github.com/hannesmuehleisen/clickhouse-r) - [RClickhouse](https://github.com/IMSMWU/RClickhouse) - Java - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) diff --git a/docs/zh/development/continuous-integration.md b/docs/zh/development/continuous-integration.md index 5bebb3aec2a..836c4a35433 100644 --- a/docs/zh/development/continuous-integration.md +++ b/docs/zh/development/continuous-integration.md @@ -86,13 +86,10 @@ git push - **Build log**: 链接到构建和文件复制日志, 当构建失败时很有用. - **Build time**. - **Artifacts**: 构建结果文件 (`XXX`是服务器版本, 比如`20.8.1.4344`). - - `clickhouse-client_XXX_all.deb` + - `clickhouse-client_XXX_amd64.deb` -` clickhouse-common-static-dbg_XXX[+asan, +msan, +ubsan, +tsan]_amd64.deb` - `clickhouse-common-staticXXX_amd64.deb` - - `clickhouse-server_XXX_all.deb` - - `clickhouse-test_XXX_all.deb` - - `clickhouse_XXX_amd64.buildinfo` - - `clickhouse_XXX_amd64.changes` + - `clickhouse-server_XXX_amd64.deb` - `clickhouse`: Main built binary. - `clickhouse-odbc-bridge` - `unit_tests_dbms`: 带有 ClickHouse 单元测试的 GoogleTest 二进制文件. diff --git a/docs/zh/getting-started/install.md b/docs/zh/getting-started/install.md index a8b803547a8..6a0b47607f5 100644 --- a/docs/zh/getting-started/install.md +++ b/docs/zh/getting-started/install.md @@ -121,22 +121,36 @@ sudo yum install clickhouse-server clickhouse-client LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \ grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) export LATEST_VERSION -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz" -curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz" -tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" +case $(uname -m) in + x86_64) ARCH=amd64 ;; + aarch64) ARCH=arm64 ;; + *) echo "Unknown architecture $(uname -m)"; exit 1 ;; +esac + +for PKG in clickhouse-common-static clickhouse-common-static-dbg clickhouse-server clickhouse-client +do + curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION-${ARCH}.tgz" \ + || curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION.tgz" +done + +exit 0 + +tar -xzvf "clickhouse-common-static-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh" -tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh" -tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-server-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" sudo /etc/init.d/clickhouse-server start -tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" +tar -xzvf "clickhouse-client-$LATEST_VERSION-${ARCH}.tgz" \ + || tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" ``` diff --git a/docs/zh/interfaces/third-party/client-libraries.md b/docs/zh/interfaces/third-party/client-libraries.md index 52f7d70c0bf..8ed482eee73 100644 --- a/docs/zh/interfaces/third-party/client-libraries.md +++ b/docs/zh/interfaces/third-party/client-libraries.md @@ -46,7 +46,6 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试 - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs) - [Klickhouse](https://github.com/Protryon/klickhouse) - R - - [clickhouse-r](https://github.com/hannesmuehleisen/clickhouse-r) - [RClickHouse](https://github.com/IMSMWU/RClickHouse) - Java - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) diff --git a/packages/clickhouse-client.yaml b/packages/clickhouse-client.yaml index 5e53090b581..642d66f5475 100644 --- a/packages/clickhouse-client.yaml +++ b/packages/clickhouse-client.yaml @@ -1,7 +1,7 @@ # package sources should be placed in ${PWD}/root # nfpm should run from the same directory with a config name: "clickhouse-client" -arch: "all" +arch: "${DEB_ARCH}" # amd64, arm64 platform: "linux" version: "${CLICKHOUSE_VERSION_STRING}" vendor: "ClickHouse Inc." diff --git a/packages/clickhouse-common-static.yaml b/packages/clickhouse-common-static.yaml index 269d4318e5e..527b6a24703 100644 --- a/packages/clickhouse-common-static.yaml +++ b/packages/clickhouse-common-static.yaml @@ -29,12 +29,14 @@ description: | contents: - src: root/usr/bin/clickhouse dst: /usr/bin/clickhouse -- src: root/usr/bin/clickhouse-odbc-bridge - dst: /usr/bin/clickhouse-odbc-bridge -- src: root/usr/bin/clickhouse-library-bridge - dst: /usr/bin/clickhouse-library-bridge +- src: root/usr/bin/clickhouse-diagnostics + dst: /usr/bin/clickhouse-diagnostics - src: root/usr/bin/clickhouse-extract-from-config dst: /usr/bin/clickhouse-extract-from-config +- src: root/usr/bin/clickhouse-library-bridge + dst: /usr/bin/clickhouse-library-bridge +- src: root/usr/bin/clickhouse-odbc-bridge + dst: /usr/bin/clickhouse-odbc-bridge - src: root/usr/share/bash-completion/completions dst: /usr/share/bash-completion/completions # docs diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml index ed56eb27e54..28995689754 100644 --- a/packages/clickhouse-server.yaml +++ b/packages/clickhouse-server.yaml @@ -1,7 +1,7 @@ # package sources should be placed in ${PWD}/root # nfpm should run from the same directory with a config name: "clickhouse-server" -arch: "all" +arch: "${DEB_ARCH}" # amd64, arm64 platform: "linux" version: "${CLICKHOUSE_VERSION_STRING}" vendor: "ClickHouse Inc." diff --git a/tools/clickhouse-diagnostics/.gitignore b/programs/diagnostics/.gitignore similarity index 100% rename from tools/clickhouse-diagnostics/.gitignore rename to programs/diagnostics/.gitignore diff --git a/tools/clickhouse-diagnostics/CONTRIBUTION.md b/programs/diagnostics/CONTRIBUTION.md similarity index 100% rename from tools/clickhouse-diagnostics/CONTRIBUTION.md rename to programs/diagnostics/CONTRIBUTION.md diff --git a/tools/clickhouse-diagnostics/Makefile b/programs/diagnostics/Makefile similarity index 80% rename from tools/clickhouse-diagnostics/Makefile rename to programs/diagnostics/Makefile index 10d77f6c44d..2e85002b871 100644 --- a/tools/clickhouse-diagnostics/Makefile +++ b/programs/diagnostics/Makefile @@ -5,7 +5,9 @@ BUILD_DIR=dist TIMESTAMP := $(shell date +%Y%m%d-%H%M) COMMIT := $(shell git rev-parse --short HEAD) -DEVLDFLAGS = -ldflags "-X github.com/ClickHouse/clickhouse-diagnostics/cmd.Version=v.dev-${TIMESTAMP} -X github.com/ClickHouse/clickhouse-diagnostics/cmd.Commit=${COMMIT}" +MODULE := github.com/ClickHouse/ClickHouse/programs/diagnostics +VERSION := v.dev-${TIMESTAMP} +DEVLDFLAGS = -ldflags "-X ${MODULE}/cmd.Version=${VERSION} -X ${MODULE}/cmd.Commit=${COMMIT}" # override with env variable to test other versions e.g. 21.11.10.1 CLICKHOUSE_VERSION ?= latest @@ -26,7 +28,7 @@ release: ## Release is delegated to goreleaser ## Build: build: ## Build a binary for local use # timestamped version - $(GOCMD) build ${DEVLDFLAGS} -o $(BINARY_NAME) . + $(GOCMD) build ${DEVLDFLAGS} -o $(BINARY_NAME) ./cmd/clickhouse-diagnostics clean: ## Remove build related file rm ${BINARY_NAME} @@ -38,6 +40,9 @@ vendor: ## Copy of all packages needed to support builds and tests in the vendor test: ## Run the tests of the project CLICKHOUSE_VERSION=$(CLICKHOUSE_VERSION) $(GOTEST) -v -race `go list ./... | grep -v ./internal/platform/test` +test-no-docker: ## Don't run tests depending on dockerd + CLICKHOUSE_VERSION=$(CLICKHOUSE_VERSION) $(GOTEST) -v -race -tags no_docker `go list ./... | grep -v ./internal/platform/test` + lint-go: ## Use golintci-lint docker run --rm -v $(shell pwd):/app -w /app golangci/golangci-lint:latest-alpine golangci-lint run diff --git a/tools/clickhouse-diagnostics/README.md b/programs/diagnostics/README.md similarity index 97% rename from tools/clickhouse-diagnostics/README.md rename to programs/diagnostics/README.md index 89de0afdf2c..f800bb0648e 100644 --- a/tools/clickhouse-diagnostics/README.md +++ b/programs/diagnostics/README.md @@ -9,7 +9,7 @@ This tool provides a means of obtaining a diagnostic bundle from a ClickHouse in - **No local dependencies** to run. We compile to a platform-independent binary, hence Go. - **Minimize resource overhead**. Improvements always welcome. - **Extendable framework**. At its core, the tool provides collectors and outputs. Collectors are independent and are responsible for collecting a specific dataset e.g. system configuration. Outputs produce the diagnostic bundle in a specific format. It should be trivial to add both for contributors. See [Collectors](#collectors) and [Outputs](#outputs) for more details. -- **Convertable output formats**. Outputs produce diagnostic bundles in different formats e.g. archive, simple report etc. Where possible, it should be possible to convert between these formats. For example, an administrator may provide a bundle as an archive to their support provider who in turn wishes to visualise this as a report or even in ClickHouse itself... +- **Convertible output formats**. Outputs produce diagnostic bundles in different formats e.g. archive, simple report etc. Where possible, it should be possible to convert between these formats. For example, an administrator may provide a bundle as an archive to their support provider who in turn wishes to visualise this as a report or even in ClickHouse itself... - **Something is better than nothing**. Collectors execute independently. We never fail a collection because one fails - preferring to warn the user only. There are good reasons for a collector failure e.g. insufficient permissions or missing data. - **Execute anywhere** - Ideally, this tool is executed on a ClickHouse host. Some collectors e.g. configuration file collection or system information, rely on this. However, collectors will obtain as much information remotely from the database as possible if executed remotely from the cluster - warning where collection fails. **We do currently require ClickHouse to be running, connecting over the native port**. @@ -25,7 +25,7 @@ The `collect` command allows the collection of a diagnostic bundle. In its simpl clickhouse-diagnostics collect ``` -This will use the default collectors and the simple output. This output produces a timestamped archive bundle in `gz` format in a sub folder named after the host. This folder name can be controlled via the parameter `--id` or configured directly for the simple output parameter `output.simple.folder` (this allows a specific diretory to be specified). +This will use the default collectors and the simple output. This output produces a timestamped archive bundle in `gz` format in a sub folder named after the host. This folder name can be controlled via the parameter `--id` or configured directly for the simple output parameter `output.simple.folder` (this allows a specific directory to be specified). Collectors, Outputs and ClickHouse connection credentials can be specified as shown below: @@ -71,7 +71,7 @@ We currently support the following collectors. A `*` indicates this collector is - `config*` - Collects the ClickHouse configuration from the local filesystem. A best effort is made using process information if ClickHouse is not installed locally. `include_path` are also considered. - `db_logs*` - Collects the ClickHouse logs directly from the database. - `logs*` - Collects the ClickHouse logs directly from the database. -- `summary*` - Collects summary statistics on the database based on a set of known useful queries. This represents the easiest collector to extend - contributions are welcome to this set which can be found [here](https://github.com/ClickHouse/clickhouse-diagnostics/blob/main/internal/collectors/clickhouse/queries.json). +- `summary*` - Collects summary statistics on the database based on a set of known useful queries. This represents the easiest collector to extend - contributions are welcome to this set which can be found [here](https://github.com/ClickHouse/ClickHouse/blob/master/programs/diagnostics/internal/collectors/clickhouse/queries.json). - `file` - Collects files based on glob patterns. Does not collect directories. To preview files which will be collected try, `clickhouse-diagnostics collect --collectors=file --collector.file.file_pattern= --output report` - `command` - Collects the output of a user specified command. To preview output, `clickhouse-diagnostics collect --collectors=command --collector.command.command="" --output report` - `zookeeper_db` - Collects information about zookeeper using the `system.zookeeper` table, recursively iterating the zookeeper tree/table. Note: changing the default parameter values can cause extremely high load to be placed on the database. Use with caution. By default, uses the glob `/clickhouse/{task_queue}/**` to match zookeeper paths and iterates to a max depth of 8. diff --git a/programs/diagnostics/cmd/clickhouse-diagnostics/main.go b/programs/diagnostics/cmd/clickhouse-diagnostics/main.go new file mode 100644 index 00000000000..0a849a9f520 --- /dev/null +++ b/programs/diagnostics/cmd/clickhouse-diagnostics/main.go @@ -0,0 +1,9 @@ +package main + +import ( + "github.com/ClickHouse/ClickHouse/programs/diagnostics/cmd" +) + +func main() { + cmd.Execute() +} diff --git a/tools/clickhouse-diagnostics/cmd/collect.go b/programs/diagnostics/cmd/collect.go similarity index 88% rename from tools/clickhouse-diagnostics/cmd/collect.go rename to programs/diagnostics/cmd/collect.go index e2228407541..503d8e41fb7 100644 --- a/tools/clickhouse-diagnostics/cmd/collect.go +++ b/programs/diagnostics/cmd/collect.go @@ -2,21 +2,22 @@ package cmd import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/cmd/params" - "github.com/ClickHouse/clickhouse-diagnostics/internal" - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - _ "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/clickhouse" - _ "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/system" - "github.com/ClickHouse/clickhouse-diagnostics/internal/outputs" - _ "github.com/ClickHouse/clickhouse-diagnostics/internal/outputs/file" - _ "github.com/ClickHouse/clickhouse-diagnostics/internal/outputs/terminal" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" + "os" + "strings" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/cmd/params" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + _ "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/clickhouse" + _ "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/system" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/outputs" + _ "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/outputs/file" + _ "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/outputs/terminal" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" "github.com/rs/zerolog/log" "github.com/spf13/cobra" "github.com/spf13/pflag" "github.com/spf13/viper" - "os" - "strings" ) var id string diff --git a/tools/clickhouse-diagnostics/cmd/convert.go b/programs/diagnostics/cmd/convert.go similarity index 100% rename from tools/clickhouse-diagnostics/cmd/convert.go rename to programs/diagnostics/cmd/convert.go diff --git a/tools/clickhouse-diagnostics/cmd/help.go b/programs/diagnostics/cmd/help.go similarity index 91% rename from tools/clickhouse-diagnostics/cmd/help.go rename to programs/diagnostics/cmd/help.go index ba15fb8e1b1..750576dda25 100644 --- a/tools/clickhouse-diagnostics/cmd/help.go +++ b/programs/diagnostics/cmd/help.go @@ -2,13 +2,14 @@ package cmd import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/cmd/params" - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - "github.com/ClickHouse/clickhouse-diagnostics/internal/outputs" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" + "os" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/cmd/params" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/outputs" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" "github.com/rs/zerolog/log" "github.com/spf13/cobra" - "os" ) var cHelp = params.StringOptionsVar{ diff --git a/tools/clickhouse-diagnostics/cmd/params/params.go b/programs/diagnostics/cmd/params/params.go similarity index 97% rename from tools/clickhouse-diagnostics/cmd/params/params.go rename to programs/diagnostics/cmd/params/params.go index 5d2bdc5fbe8..c4464aab5d2 100644 --- a/tools/clickhouse-diagnostics/cmd/params/params.go +++ b/programs/diagnostics/cmd/params/params.go @@ -4,10 +4,11 @@ import ( "bytes" "encoding/csv" "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" - "github.com/spf13/cobra" "strings" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" + "github.com/spf13/cobra" ) type cliParamType uint8 diff --git a/tools/clickhouse-diagnostics/cmd/params/params_test.go b/programs/diagnostics/cmd/params/params_test.go similarity index 98% rename from tools/clickhouse-diagnostics/cmd/params/params_test.go rename to programs/diagnostics/cmd/params/params_test.go index 80a8b039d36..7671506ba59 100644 --- a/tools/clickhouse-diagnostics/cmd/params/params_test.go +++ b/programs/diagnostics/cmd/params/params_test.go @@ -1,13 +1,14 @@ package params_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/cmd/params" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/spf13/cobra" - "github.com/stretchr/testify/require" "os" "sort" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/cmd/params" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/spf13/cobra" + "github.com/stretchr/testify/require" ) var conf = map[string]config.Configuration{ diff --git a/tools/clickhouse-diagnostics/cmd/root.go b/programs/diagnostics/cmd/root.go similarity index 98% rename from tools/clickhouse-diagnostics/cmd/root.go rename to programs/diagnostics/cmd/root.go index b6d860df76a..4cf329d5438 100644 --- a/tools/clickhouse-diagnostics/cmd/root.go +++ b/programs/diagnostics/cmd/root.go @@ -2,17 +2,18 @@ package cmd import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" - "github.com/pkg/errors" - "github.com/rs/zerolog" - "github.com/rs/zerolog/log" - "github.com/spf13/cobra" - "github.com/spf13/viper" "net/http" _ "net/http/pprof" "os" "strings" "time" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" + "github.com/pkg/errors" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" + "github.com/spf13/cobra" + "github.com/spf13/viper" ) func enableDebug() { diff --git a/tools/clickhouse-diagnostics/cmd/version.go b/programs/diagnostics/cmd/version.go similarity index 100% rename from tools/clickhouse-diagnostics/cmd/version.go rename to programs/diagnostics/cmd/version.go diff --git a/tools/clickhouse-diagnostics/go.mod b/programs/diagnostics/go.mod similarity index 98% rename from tools/clickhouse-diagnostics/go.mod rename to programs/diagnostics/go.mod index 1672cb93817..19fc2ec8202 100644 --- a/tools/clickhouse-diagnostics/go.mod +++ b/programs/diagnostics/go.mod @@ -1,4 +1,4 @@ -module github.com/ClickHouse/clickhouse-diagnostics +module github.com/ClickHouse/ClickHouse/programs/diagnostics go 1.17 diff --git a/tools/clickhouse-diagnostics/go.sum b/programs/diagnostics/go.sum similarity index 100% rename from tools/clickhouse-diagnostics/go.sum rename to programs/diagnostics/go.sum diff --git a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/config.go b/programs/diagnostics/internal/collectors/clickhouse/config.go similarity index 88% rename from tools/clickhouse-diagnostics/internal/collectors/clickhouse/config.go rename to programs/diagnostics/internal/collectors/clickhouse/config.go index f6f2d441ed2..92368bce6f3 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/config.go +++ b/programs/diagnostics/internal/collectors/clickhouse/config.go @@ -2,13 +2,14 @@ package clickhouse import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" - "github.com/pkg/errors" "path/filepath" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" + "github.com/pkg/errors" ) type ConfigCollector struct { diff --git a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/config_test.go b/programs/diagnostics/internal/collectors/clickhouse/config_test.go similarity index 93% rename from tools/clickhouse-diagnostics/internal/collectors/clickhouse/config_test.go rename to programs/diagnostics/internal/collectors/clickhouse/config_test.go index 67205fb9384..355cbb65620 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/config_test.go +++ b/programs/diagnostics/internal/collectors/clickhouse/config_test.go @@ -3,15 +3,16 @@ package clickhouse_test import ( "encoding/xml" "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/clickhouse" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/stretchr/testify/require" "io" "os" "path" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/clickhouse" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/stretchr/testify/require" ) func TestConfigConfiguration(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/db_logs.go b/programs/diagnostics/internal/collectors/clickhouse/db_logs.go similarity index 88% rename from tools/clickhouse-diagnostics/internal/collectors/clickhouse/db_logs.go rename to programs/diagnostics/internal/collectors/clickhouse/db_logs.go index 23a47c33c7f..3253f504c1b 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/db_logs.go +++ b/programs/diagnostics/internal/collectors/clickhouse/db_logs.go @@ -1,10 +1,10 @@ package clickhouse import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" "github.com/pkg/errors" ) diff --git a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/db_logs_test.go b/programs/diagnostics/internal/collectors/clickhouse/db_logs_test.go similarity index 98% rename from tools/clickhouse-diagnostics/internal/collectors/clickhouse/db_logs_test.go rename to programs/diagnostics/internal/collectors/clickhouse/db_logs_test.go index 9c403de281a..3fc585f3352 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/db_logs_test.go +++ b/programs/diagnostics/internal/collectors/clickhouse/db_logs_test.go @@ -1,12 +1,13 @@ package clickhouse_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/clickhouse" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/test" - "github.com/stretchr/testify/require" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/clickhouse" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/test" + "github.com/stretchr/testify/require" ) func TestDbLogsConfiguration(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/logs.go b/programs/diagnostics/internal/collectors/clickhouse/logs.go similarity index 90% rename from tools/clickhouse-diagnostics/internal/collectors/clickhouse/logs.go rename to programs/diagnostics/internal/collectors/clickhouse/logs.go index 8d01c858947..8436a392c47 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/logs.go +++ b/programs/diagnostics/internal/collectors/clickhouse/logs.go @@ -2,12 +2,13 @@ package clickhouse import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" "path/filepath" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" ) // This collector collects logs diff --git a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/logs_test.go b/programs/diagnostics/internal/collectors/clickhouse/logs_test.go similarity index 92% rename from tools/clickhouse-diagnostics/internal/collectors/clickhouse/logs_test.go rename to programs/diagnostics/internal/collectors/clickhouse/logs_test.go index dd94997c465..5f0be734445 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/logs_test.go +++ b/programs/diagnostics/internal/collectors/clickhouse/logs_test.go @@ -2,15 +2,16 @@ package clickhouse_test import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/clickhouse" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/test" - "github.com/stretchr/testify/require" "os" "path" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/clickhouse" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/test" + "github.com/stretchr/testify/require" ) func TestLogsConfiguration(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/queries.json b/programs/diagnostics/internal/collectors/clickhouse/queries.json similarity index 100% rename from tools/clickhouse-diagnostics/internal/collectors/clickhouse/queries.json rename to programs/diagnostics/internal/collectors/clickhouse/queries.json diff --git a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/summary.go b/programs/diagnostics/internal/collectors/clickhouse/summary.go similarity index 92% rename from tools/clickhouse-diagnostics/internal/collectors/clickhouse/summary.go rename to programs/diagnostics/internal/collectors/clickhouse/summary.go index 603fc954642..0b6dd3aff20 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/summary.go +++ b/programs/diagnostics/internal/collectors/clickhouse/summary.go @@ -4,14 +4,15 @@ import ( "bytes" _ "embed" "encoding/json" - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/Masterminds/semver" - "github.com/pkg/errors" "strings" "text/template" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/Masterminds/semver" + "github.com/pkg/errors" ) // This collector collects the system db from database diff --git a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/summary_test.go b/programs/diagnostics/internal/collectors/clickhouse/summary_test.go similarity index 93% rename from tools/clickhouse-diagnostics/internal/collectors/clickhouse/summary_test.go rename to programs/diagnostics/internal/collectors/clickhouse/summary_test.go index 7c15cd58a1e..92945d987ed 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/summary_test.go +++ b/programs/diagnostics/internal/collectors/clickhouse/summary_test.go @@ -1,12 +1,13 @@ package clickhouse_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/clickhouse" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/test" - "github.com/stretchr/testify/require" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/clickhouse" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/test" + "github.com/stretchr/testify/require" ) func TestSummaryConfiguration(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/system.go b/programs/diagnostics/internal/collectors/clickhouse/system.go similarity index 93% rename from tools/clickhouse-diagnostics/internal/collectors/clickhouse/system.go rename to programs/diagnostics/internal/collectors/clickhouse/system.go index b370a3ab1df..d47cfd924f3 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/system.go +++ b/programs/diagnostics/internal/collectors/clickhouse/system.go @@ -2,11 +2,12 @@ package clickhouse import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" "github.com/pkg/errors" ) diff --git a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/system_test.go b/programs/diagnostics/internal/collectors/clickhouse/system_test.go similarity index 97% rename from tools/clickhouse-diagnostics/internal/collectors/clickhouse/system_test.go rename to programs/diagnostics/internal/collectors/clickhouse/system_test.go index a11bbd75843..d1b9a6e7859 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/system_test.go +++ b/programs/diagnostics/internal/collectors/clickhouse/system_test.go @@ -1,13 +1,14 @@ package clickhouse_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/clickhouse" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/test" - "github.com/stretchr/testify/require" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/clickhouse" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/test" + "github.com/stretchr/testify/require" ) func TestSystemConfiguration(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/zookeeper.go b/programs/diagnostics/internal/collectors/clickhouse/zookeeper.go similarity index 94% rename from tools/clickhouse-diagnostics/internal/collectors/clickhouse/zookeeper.go rename to programs/diagnostics/internal/collectors/clickhouse/zookeeper.go index cd7cd8bfc6c..78aefeaa0c1 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/zookeeper.go +++ b/programs/diagnostics/internal/collectors/clickhouse/zookeeper.go @@ -2,14 +2,15 @@ package clickhouse import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" + "strings" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" "github.com/bmatcuk/doublestar/v4" "github.com/pkg/errors" "github.com/rs/zerolog/log" - "strings" ) // This collector collects the system zookeeper db diff --git a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/zookeeper_test.go b/programs/diagnostics/internal/collectors/clickhouse/zookeeper_test.go similarity index 92% rename from tools/clickhouse-diagnostics/internal/collectors/clickhouse/zookeeper_test.go rename to programs/diagnostics/internal/collectors/clickhouse/zookeeper_test.go index 8d53d044e76..3e56f6200f0 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/clickhouse/zookeeper_test.go +++ b/programs/diagnostics/internal/collectors/clickhouse/zookeeper_test.go @@ -1,13 +1,14 @@ package clickhouse_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/clickhouse" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/test" - "github.com/stretchr/testify/require" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/clickhouse" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/test" + "github.com/stretchr/testify/require" ) func TestZookeeperConfiguration(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/collectors/registry.go b/programs/diagnostics/internal/collectors/registry.go similarity index 93% rename from tools/clickhouse-diagnostics/internal/collectors/registry.go rename to programs/diagnostics/internal/collectors/registry.go index 29eb1f5939c..5611f947466 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/registry.go +++ b/programs/diagnostics/internal/collectors/registry.go @@ -2,8 +2,9 @@ package collectors import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" "github.com/pkg/errors" "github.com/rs/zerolog/log" ) diff --git a/tools/clickhouse-diagnostics/internal/collectors/registry_test.go b/programs/diagnostics/internal/collectors/registry_test.go similarity index 84% rename from tools/clickhouse-diagnostics/internal/collectors/registry_test.go rename to programs/diagnostics/internal/collectors/registry_test.go index 9f920f4b5c6..eccc5f2265d 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/registry_test.go +++ b/programs/diagnostics/internal/collectors/registry_test.go @@ -1,12 +1,13 @@ package collectors_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/clickhouse" - _ "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/system" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/stretchr/testify/require" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/clickhouse" + _ "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/system" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/stretchr/testify/require" ) func TestGetCollectorNames(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/collectors/system/command.go b/programs/diagnostics/internal/collectors/system/command.go similarity index 87% rename from tools/clickhouse-diagnostics/internal/collectors/system/command.go rename to programs/diagnostics/internal/collectors/system/command.go index 95a958ec6ce..ba4dd1e996c 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/system/command.go +++ b/programs/diagnostics/internal/collectors/system/command.go @@ -2,13 +2,14 @@ package system import ( "bytes" - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" + "os/exec" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" "github.com/google/shlex" "github.com/pkg/errors" - "os/exec" ) // This collector runs a user specified command and collects it to a file diff --git a/tools/clickhouse-diagnostics/internal/collectors/system/command_test.go b/programs/diagnostics/internal/collectors/system/command_test.go similarity index 91% rename from tools/clickhouse-diagnostics/internal/collectors/system/command_test.go rename to programs/diagnostics/internal/collectors/system/command_test.go index e3d04cebe21..7de00cdabf4 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/system/command_test.go +++ b/programs/diagnostics/internal/collectors/system/command_test.go @@ -2,12 +2,13 @@ package system_test import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/system" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/stretchr/testify/require" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/system" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/stretchr/testify/require" ) func TestCommandConfiguration(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/collectors/system/file.go b/programs/diagnostics/internal/collectors/system/file.go similarity index 88% rename from tools/clickhouse-diagnostics/internal/collectors/system/file.go rename to programs/diagnostics/internal/collectors/system/file.go index 68e96314d1d..cda91636c52 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/system/file.go +++ b/programs/diagnostics/internal/collectors/system/file.go @@ -1,14 +1,15 @@ package system import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" + "os" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" "github.com/pkg/errors" "github.com/rs/zerolog/log" "github.com/yargevad/filepathx" - "os" ) // This collector collects arbitrary user files diff --git a/tools/clickhouse-diagnostics/internal/collectors/system/file_test.go b/programs/diagnostics/internal/collectors/system/file_test.go similarity index 91% rename from tools/clickhouse-diagnostics/internal/collectors/system/file_test.go rename to programs/diagnostics/internal/collectors/system/file_test.go index f52c190d087..5b1d5b3a92f 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/system/file_test.go +++ b/programs/diagnostics/internal/collectors/system/file_test.go @@ -1,12 +1,13 @@ package system_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/system" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/stretchr/testify/require" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/system" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/stretchr/testify/require" ) func TestFileConfiguration(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/collectors/system/system.go b/programs/diagnostics/internal/collectors/system/system.go similarity index 95% rename from tools/clickhouse-diagnostics/internal/collectors/system/system.go rename to programs/diagnostics/internal/collectors/system/system.go index b2aaee976ba..69d16f36b8b 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/system/system.go +++ b/programs/diagnostics/internal/collectors/system/system.go @@ -1,15 +1,16 @@ package system import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" + "strings" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" "github.com/elastic/gosigar" "github.com/jaypipes/ghw" "github.com/matishsiao/goInfo" "github.com/pkg/errors" - "strings" ) // This collector collects the system overview @@ -25,7 +26,7 @@ func NewSystemCollector(m *platform.ResourceManager) *SystemCollector { } func (sc *SystemCollector) Collect(conf config.Configuration) (*data.DiagnosticBundle, error) { - conf, err := conf.ValidateConfig(sc.Configuration()) + _, err := conf.ValidateConfig(sc.Configuration()) if err != nil { return &data.DiagnosticBundle{}, err } diff --git a/tools/clickhouse-diagnostics/internal/collectors/system/system_test.go b/programs/diagnostics/internal/collectors/system/system_test.go similarity index 91% rename from tools/clickhouse-diagnostics/internal/collectors/system/system_test.go rename to programs/diagnostics/internal/collectors/system/system_test.go index 35777f6a298..fb1e16bd1ed 100644 --- a/tools/clickhouse-diagnostics/internal/collectors/system/system_test.go +++ b/programs/diagnostics/internal/collectors/system/system_test.go @@ -1,12 +1,13 @@ package system_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/system" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/stretchr/testify/require" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/system" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/stretchr/testify/require" ) func TestSystemConfiguration(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/outputs/file/simple.go b/programs/diagnostics/internal/outputs/file/simple.go similarity index 96% rename from tools/clickhouse-diagnostics/internal/outputs/file/simple.go rename to programs/diagnostics/internal/outputs/file/simple.go index d19c1222fe1..63847b3addd 100644 --- a/tools/clickhouse-diagnostics/internal/outputs/file/simple.go +++ b/programs/diagnostics/internal/outputs/file/simple.go @@ -4,18 +4,19 @@ import ( "context" "encoding/csv" "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/outputs" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" - "github.com/mholt/archiver/v4" - "github.com/pkg/errors" - "github.com/rs/zerolog/log" "os" "path" "path/filepath" "strconv" "strings" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/outputs" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" + "github.com/mholt/archiver/v4" + "github.com/pkg/errors" + "github.com/rs/zerolog/log" ) const OutputName = "simple" @@ -148,7 +149,7 @@ func writeDatabaseFrame(frameId string, frame data.Frame, baseDir string) ([]str errs = append(errs, errors.Wrapf(err, "unable to write columns for frame %s", frameId)) return []string{}, errs } - // we don't collect an error for every line here like configs and logs - could mean alot of unnecessary noise + // we don't collect an error for every line here like configs and logs - could mean a lot of unnecessary noise for { values, ok, err := frame.Next() if err != nil { diff --git a/tools/clickhouse-diagnostics/internal/outputs/file/simple_test.go b/programs/diagnostics/internal/outputs/file/simple_test.go similarity index 98% rename from tools/clickhouse-diagnostics/internal/outputs/file/simple_test.go rename to programs/diagnostics/internal/outputs/file/simple_test.go index dfa9bc6d80a..471a1c70cc1 100644 --- a/tools/clickhouse-diagnostics/internal/outputs/file/simple_test.go +++ b/programs/diagnostics/internal/outputs/file/simple_test.go @@ -4,16 +4,17 @@ import ( "bufio" "encoding/xml" "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/outputs/file" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/test" - "github.com/stretchr/testify/require" "io" "os" "path" "strings" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/outputs/file" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/test" + "github.com/stretchr/testify/require" ) var clusterFrame = test.NewFakeDataFrame("clusters", []string{"cluster", "shard_num", "shard_weight", "replica_num", "host_name", "host_address", "port", "is_local", "user", "default_database", "errors_count", "slowdowns_count", "estimated_recovery_time"}, diff --git a/tools/clickhouse-diagnostics/internal/outputs/registry.go b/programs/diagnostics/internal/outputs/registry.go similarity index 91% rename from tools/clickhouse-diagnostics/internal/outputs/registry.go rename to programs/diagnostics/internal/outputs/registry.go index 8782ecfda4f..0187cd9105d 100644 --- a/tools/clickhouse-diagnostics/internal/outputs/registry.go +++ b/programs/diagnostics/internal/outputs/registry.go @@ -2,8 +2,9 @@ package outputs import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" "github.com/pkg/errors" "github.com/rs/zerolog/log" ) diff --git a/tools/clickhouse-diagnostics/internal/outputs/registry_test.go b/programs/diagnostics/internal/outputs/registry_test.go similarity index 81% rename from tools/clickhouse-diagnostics/internal/outputs/registry_test.go rename to programs/diagnostics/internal/outputs/registry_test.go index 0b0fe3597af..ba8408e5a59 100644 --- a/tools/clickhouse-diagnostics/internal/outputs/registry_test.go +++ b/programs/diagnostics/internal/outputs/registry_test.go @@ -1,11 +1,12 @@ package outputs_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/outputs" - "github.com/ClickHouse/clickhouse-diagnostics/internal/outputs/file" - _ "github.com/ClickHouse/clickhouse-diagnostics/internal/outputs/terminal" - "github.com/stretchr/testify/require" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/outputs" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/outputs/file" + _ "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/outputs/terminal" + "github.com/stretchr/testify/require" ) func TestGetOutputNames(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/outputs/terminal/report.go b/programs/diagnostics/internal/outputs/terminal/report.go similarity index 96% rename from tools/clickhouse-diagnostics/internal/outputs/terminal/report.go rename to programs/diagnostics/internal/outputs/terminal/report.go index a601e10687a..8337f542457 100644 --- a/tools/clickhouse-diagnostics/internal/outputs/terminal/report.go +++ b/programs/diagnostics/internal/outputs/terminal/report.go @@ -3,12 +3,13 @@ package terminal import ( "bufio" "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/outputs" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" + "os" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/outputs" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" "github.com/olekukonko/tablewriter" "github.com/pkg/errors" - "os" ) const OutputName = "report" diff --git a/tools/clickhouse-diagnostics/internal/platform/config/models.go b/programs/diagnostics/internal/platform/config/models.go similarity index 97% rename from tools/clickhouse-diagnostics/internal/platform/config/models.go rename to programs/diagnostics/internal/platform/config/models.go index 52b2489886d..6c76b8f149b 100644 --- a/tools/clickhouse-diagnostics/internal/platform/config/models.go +++ b/programs/diagnostics/internal/platform/config/models.go @@ -2,8 +2,9 @@ package config import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" "strings" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" ) type ConfigParam interface { diff --git a/tools/clickhouse-diagnostics/internal/platform/config/models_test.go b/programs/diagnostics/internal/platform/config/models_test.go similarity index 98% rename from tools/clickhouse-diagnostics/internal/platform/config/models_test.go rename to programs/diagnostics/internal/platform/config/models_test.go index b87e66408ef..916d20ec28b 100644 --- a/tools/clickhouse-diagnostics/internal/platform/config/models_test.go +++ b/programs/diagnostics/internal/platform/config/models_test.go @@ -1,9 +1,10 @@ package config_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/stretchr/testify/require" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/stretchr/testify/require" ) var conf = config.Configuration{ diff --git a/tools/clickhouse-diagnostics/internal/platform/config/utils.go b/programs/diagnostics/internal/platform/config/utils.go similarity index 95% rename from tools/clickhouse-diagnostics/internal/platform/config/utils.go rename to programs/diagnostics/internal/platform/config/utils.go index 127ff95570e..5f84c38d4f4 100644 --- a/tools/clickhouse-diagnostics/internal/platform/config/utils.go +++ b/programs/diagnostics/internal/platform/config/utils.go @@ -2,7 +2,8 @@ package config import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" ) func ReadStringListValues(conf Configuration, paramName string) ([]string, error) { diff --git a/tools/clickhouse-diagnostics/internal/platform/config/utils_test.go b/programs/diagnostics/internal/platform/config/utils_test.go similarity index 97% rename from tools/clickhouse-diagnostics/internal/platform/config/utils_test.go rename to programs/diagnostics/internal/platform/config/utils_test.go index 0f9791eb60e..9e03e5e69d2 100644 --- a/tools/clickhouse-diagnostics/internal/platform/config/utils_test.go +++ b/programs/diagnostics/internal/platform/config/utils_test.go @@ -1,9 +1,10 @@ package config_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/stretchr/testify/require" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/stretchr/testify/require" ) func TestReadStringListValues(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/platform/data/bundle.go b/programs/diagnostics/internal/platform/data/bundle.go similarity index 100% rename from tools/clickhouse-diagnostics/internal/platform/data/bundle.go rename to programs/diagnostics/internal/platform/data/bundle.go diff --git a/tools/clickhouse-diagnostics/internal/platform/data/bundle_test.go b/programs/diagnostics/internal/platform/data/bundle_test.go similarity index 84% rename from tools/clickhouse-diagnostics/internal/platform/data/bundle_test.go rename to programs/diagnostics/internal/platform/data/bundle_test.go index 5438a50ae0a..ff9cfc2cf56 100644 --- a/tools/clickhouse-diagnostics/internal/platform/data/bundle_test.go +++ b/programs/diagnostics/internal/platform/data/bundle_test.go @@ -1,10 +1,11 @@ package data_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" + "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" "github.com/pkg/errors" "github.com/stretchr/testify/require" - "testing" ) func TestBundleError(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/platform/data/database.go b/programs/diagnostics/internal/platform/data/database.go similarity index 100% rename from tools/clickhouse-diagnostics/internal/platform/data/database.go rename to programs/diagnostics/internal/platform/data/database.go diff --git a/tools/clickhouse-diagnostics/internal/platform/data/database_test.go b/programs/diagnostics/internal/platform/data/database_test.go similarity index 96% rename from tools/clickhouse-diagnostics/internal/platform/data/database_test.go rename to programs/diagnostics/internal/platform/data/database_test.go index 7491f016a67..57d89e78efc 100644 --- a/tools/clickhouse-diagnostics/internal/platform/data/database_test.go +++ b/programs/diagnostics/internal/platform/data/database_test.go @@ -2,10 +2,11 @@ package data_test import ( "database/sql" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" + "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" "github.com/DATA-DOG/go-sqlmock" "github.com/stretchr/testify/require" - "testing" ) func TestString(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/platform/data/field.go b/programs/diagnostics/internal/platform/data/field.go similarity index 100% rename from tools/clickhouse-diagnostics/internal/platform/data/field.go rename to programs/diagnostics/internal/platform/data/field.go diff --git a/tools/clickhouse-diagnostics/internal/platform/data/file.go b/programs/diagnostics/internal/platform/data/file.go similarity index 98% rename from tools/clickhouse-diagnostics/internal/platform/data/file.go rename to programs/diagnostics/internal/platform/data/file.go index 8bdde8a6437..9760b4b6906 100644 --- a/tools/clickhouse-diagnostics/internal/platform/data/file.go +++ b/programs/diagnostics/internal/platform/data/file.go @@ -3,14 +3,15 @@ package data import ( "bufio" "encoding/xml" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" - "github.com/pkg/errors" - "gopkg.in/yaml.v3" "io/ioutil" "os" "path" "path/filepath" "regexp" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" + "github.com/pkg/errors" + "gopkg.in/yaml.v3" ) type File interface { @@ -302,7 +303,7 @@ func (x XmlConfigFile) FindLogPaths() ([]string, error) { func (x XmlConfigFile) FindIncludedConfig() (ConfigFile, error) { if x.Included { - //cant recurse + //can't recurse return XmlConfigFile{}, nil } config, err := x.UnmarshallConfig() @@ -384,7 +385,7 @@ func (y YamlConfigFile) FindLogPaths() ([]string, error) { func (y YamlConfigFile) FindIncludedConfig() (ConfigFile, error) { if y.Included { - //cant recurse + //can't recurse return YamlConfigFile{}, nil } inputFile, err := ioutil.ReadFile(y.Path) diff --git a/tools/clickhouse-diagnostics/internal/platform/data/file_test.go b/programs/diagnostics/internal/platform/data/file_test.go similarity index 99% rename from tools/clickhouse-diagnostics/internal/platform/data/file_test.go rename to programs/diagnostics/internal/platform/data/file_test.go index 49f0bb9cf72..d273987d327 100644 --- a/tools/clickhouse-diagnostics/internal/platform/data/file_test.go +++ b/programs/diagnostics/internal/platform/data/file_test.go @@ -2,14 +2,15 @@ package data_test import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/stretchr/testify/require" "io/ioutil" "os" "path" "path/filepath" "strings" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/stretchr/testify/require" ) func TestNextFileDirectoryFrame(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/platform/data/frame.go b/programs/diagnostics/internal/platform/data/frame.go similarity index 100% rename from tools/clickhouse-diagnostics/internal/platform/data/frame.go rename to programs/diagnostics/internal/platform/data/frame.go diff --git a/tools/clickhouse-diagnostics/internal/platform/data/memory.go b/programs/diagnostics/internal/platform/data/memory.go similarity index 100% rename from tools/clickhouse-diagnostics/internal/platform/data/memory.go rename to programs/diagnostics/internal/platform/data/memory.go diff --git a/tools/clickhouse-diagnostics/internal/platform/data/memory_test.go b/programs/diagnostics/internal/platform/data/memory_test.go similarity index 94% rename from tools/clickhouse-diagnostics/internal/platform/data/memory_test.go rename to programs/diagnostics/internal/platform/data/memory_test.go index 3fc2f6822ef..fcc02e37d32 100644 --- a/tools/clickhouse-diagnostics/internal/platform/data/memory_test.go +++ b/programs/diagnostics/internal/platform/data/memory_test.go @@ -1,9 +1,10 @@ package data_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/stretchr/testify/require" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/stretchr/testify/require" ) func TestNextMemoryFrame(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/platform/data/misc.go b/programs/diagnostics/internal/platform/data/misc.go similarity index 100% rename from tools/clickhouse-diagnostics/internal/platform/data/misc.go rename to programs/diagnostics/internal/platform/data/misc.go diff --git a/tools/clickhouse-diagnostics/internal/platform/database/native.go b/programs/diagnostics/internal/platform/database/native.go similarity index 96% rename from tools/clickhouse-diagnostics/internal/platform/database/native.go rename to programs/diagnostics/internal/platform/database/native.go index f167fb5540c..e512a634fbf 100644 --- a/tools/clickhouse-diagnostics/internal/platform/database/native.go +++ b/programs/diagnostics/internal/platform/database/native.go @@ -3,10 +3,11 @@ package database import ( "database/sql" "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" + "strings" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" _ "github.com/ClickHouse/clickhouse-go/v2" "github.com/pkg/errors" - "strings" ) type ClickhouseNativeClient struct { diff --git a/tools/clickhouse-diagnostics/internal/platform/database/native_test.go b/programs/diagnostics/internal/platform/database/native_test.go similarity index 96% rename from tools/clickhouse-diagnostics/internal/platform/database/native_test.go rename to programs/diagnostics/internal/platform/database/native_test.go index 1e936fe2449..4814310f182 100644 --- a/tools/clickhouse-diagnostics/internal/platform/database/native_test.go +++ b/programs/diagnostics/internal/platform/database/native_test.go @@ -1,18 +1,21 @@ +//go:build !no_docker + package database_test import ( "context" "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/database" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/test" - "github.com/stretchr/testify/require" - "github.com/testcontainers/testcontainers-go" - "github.com/testcontainers/testcontainers-go/wait" "os" "path" "strconv" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/database" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/test" + "github.com/stretchr/testify/require" + "github.com/testcontainers/testcontainers-go" + "github.com/testcontainers/testcontainers-go/wait" ) func TestMain(m *testing.M) { diff --git a/tools/clickhouse-diagnostics/internal/platform/manager.go b/programs/diagnostics/internal/platform/manager.go similarity index 86% rename from tools/clickhouse-diagnostics/internal/platform/manager.go rename to programs/diagnostics/internal/platform/manager.go index e23a534a6fc..b4435b62ea2 100644 --- a/tools/clickhouse-diagnostics/internal/platform/manager.go +++ b/programs/diagnostics/internal/platform/manager.go @@ -2,9 +2,10 @@ package platform import ( "errors" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/database" "sync" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/database" ) var once sync.Once diff --git a/tools/clickhouse-diagnostics/internal/platform/manager_test.go b/programs/diagnostics/internal/platform/manager_test.go similarity index 92% rename from tools/clickhouse-diagnostics/internal/platform/manager_test.go rename to programs/diagnostics/internal/platform/manager_test.go index 09316052b53..49efee49ce3 100644 --- a/tools/clickhouse-diagnostics/internal/platform/manager_test.go +++ b/programs/diagnostics/internal/platform/manager_test.go @@ -1,17 +1,20 @@ +//go:build !no_docker + package platform_test import ( "context" "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/test" - "github.com/stretchr/testify/require" - "github.com/testcontainers/testcontainers-go" - "github.com/testcontainers/testcontainers-go/wait" "os" "path" "strconv" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/test" + "github.com/stretchr/testify/require" + "github.com/testcontainers/testcontainers-go" + "github.com/testcontainers/testcontainers-go/wait" ) func TestMain(m *testing.M) { diff --git a/tools/clickhouse-diagnostics/internal/platform/test/data.go b/programs/diagnostics/internal/platform/test/data.go similarity index 96% rename from tools/clickhouse-diagnostics/internal/platform/test/data.go rename to programs/diagnostics/internal/platform/test/data.go index 44ce7c70908..7710e9a69a1 100644 --- a/tools/clickhouse-diagnostics/internal/platform/test/data.go +++ b/programs/diagnostics/internal/platform/test/data.go @@ -2,11 +2,12 @@ package test import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" - "github.com/pkg/errors" "sort" "strings" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" + "github.com/pkg/errors" ) type fakeClickhouseClient struct { diff --git a/tools/clickhouse-diagnostics/internal/platform/test/env.go b/programs/diagnostics/internal/platform/test/env.go similarity index 100% rename from tools/clickhouse-diagnostics/internal/platform/test/env.go rename to programs/diagnostics/internal/platform/test/env.go diff --git a/tools/clickhouse-diagnostics/internal/platform/utils/file.go b/programs/diagnostics/internal/platform/utils/file.go similarity index 96% rename from tools/clickhouse-diagnostics/internal/platform/utils/file.go rename to programs/diagnostics/internal/platform/utils/file.go index 608e45be74a..71af4b32658 100644 --- a/tools/clickhouse-diagnostics/internal/platform/utils/file.go +++ b/programs/diagnostics/internal/platform/utils/file.go @@ -2,11 +2,12 @@ package utils import ( "fmt" - "github.com/pkg/errors" "io" "io/fs" "os" "path/filepath" + + "github.com/pkg/errors" ) func FileExists(name string) (bool, error) { @@ -64,7 +65,7 @@ func CopyFile(sourceFilename string, destFilename string) error { return err } -// patterns passed are an OR - any can be satisified and the file will be listed +// patterns passed are an OR - any can be satisfied and the file will be listed func ListFilesInDirectory(directory string, patterns []string) ([]string, []error) { var files []string diff --git a/tools/clickhouse-diagnostics/internal/platform/utils/file_test.go b/programs/diagnostics/internal/platform/utils/file_test.go similarity index 97% rename from tools/clickhouse-diagnostics/internal/platform/utils/file_test.go rename to programs/diagnostics/internal/platform/utils/file_test.go index 51c8ed2e9c5..8d0430090c9 100644 --- a/tools/clickhouse-diagnostics/internal/platform/utils/file_test.go +++ b/programs/diagnostics/internal/platform/utils/file_test.go @@ -2,11 +2,12 @@ package utils_test import ( "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" - "github.com/stretchr/testify/require" "os" "path" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" + "github.com/stretchr/testify/require" ) func TestFileExists(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/platform/utils/process.go b/programs/diagnostics/internal/platform/utils/process.go similarity index 100% rename from tools/clickhouse-diagnostics/internal/platform/utils/process.go rename to programs/diagnostics/internal/platform/utils/process.go diff --git a/tools/clickhouse-diagnostics/internal/platform/utils/process_test.go b/programs/diagnostics/internal/platform/utils/process_test.go similarity index 92% rename from tools/clickhouse-diagnostics/internal/platform/utils/process_test.go rename to programs/diagnostics/internal/platform/utils/process_test.go index 45bbc18bdef..ed54d16cc72 100644 --- a/tools/clickhouse-diagnostics/internal/platform/utils/process_test.go +++ b/programs/diagnostics/internal/platform/utils/process_test.go @@ -1,16 +1,19 @@ +//go:build !no_docker + package utils_test import ( "context" "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/test" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" - "github.com/stretchr/testify/require" - "github.com/testcontainers/testcontainers-go" - "github.com/testcontainers/testcontainers-go/wait" "os" "path" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/test" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" + "github.com/stretchr/testify/require" + "github.com/testcontainers/testcontainers-go" + "github.com/testcontainers/testcontainers-go/wait" ) func TestMain(m *testing.M) { diff --git a/tools/clickhouse-diagnostics/internal/platform/utils/slices.go b/programs/diagnostics/internal/platform/utils/slices.go similarity index 100% rename from tools/clickhouse-diagnostics/internal/platform/utils/slices.go rename to programs/diagnostics/internal/platform/utils/slices.go diff --git a/tools/clickhouse-diagnostics/internal/platform/utils/slices_test.go b/programs/diagnostics/internal/platform/utils/slices_test.go similarity index 95% rename from tools/clickhouse-diagnostics/internal/platform/utils/slices_test.go rename to programs/diagnostics/internal/platform/utils/slices_test.go index 18965415013..ea5c1c81dcc 100644 --- a/tools/clickhouse-diagnostics/internal/platform/utils/slices_test.go +++ b/programs/diagnostics/internal/platform/utils/slices_test.go @@ -1,9 +1,10 @@ package utils_test import ( - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" - "github.com/stretchr/testify/require" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" + "github.com/stretchr/testify/require" ) func TestIntersection(t *testing.T) { diff --git a/tools/clickhouse-diagnostics/internal/platform/utils/time.go b/programs/diagnostics/internal/platform/utils/time.go similarity index 100% rename from tools/clickhouse-diagnostics/internal/platform/utils/time.go rename to programs/diagnostics/internal/platform/utils/time.go diff --git a/tools/clickhouse-diagnostics/internal/runner.go b/programs/diagnostics/internal/runner.go similarity index 88% rename from tools/clickhouse-diagnostics/internal/runner.go rename to programs/diagnostics/internal/runner.go index d2147cd1c65..9386a1d178b 100644 --- a/tools/clickhouse-diagnostics/internal/runner.go +++ b/programs/diagnostics/internal/runner.go @@ -1,11 +1,11 @@ package internal import ( - c "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - o "github.com/ClickHouse/clickhouse-diagnostics/internal/outputs" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/data" + c "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + o "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/outputs" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/data" "github.com/pkg/errors" "github.com/rs/zerolog/log" ) @@ -96,7 +96,7 @@ func output(config *runConfiguration, bundles map[string]*data.DiagnosticBundle) return err } frameErrors, err := output.Write(config.id, bundles, config.outputConfig) - // we report over failing hard on frame errors - upto the output to determine what is fatal via error + // we report over failing hard on frame errors - up to the output to determine what is fatal via error for _, fError := range frameErrors.Errors { log.Warn().Msgf("failure to write frame in output %s - %s", config.output, fError) } diff --git a/tools/clickhouse-diagnostics/internal/runner_test.go b/programs/diagnostics/internal/runner_test.go similarity index 84% rename from tools/clickhouse-diagnostics/internal/runner_test.go rename to programs/diagnostics/internal/runner_test.go index a6e7dbe4cbc..8cf29a140ec 100644 --- a/tools/clickhouse-diagnostics/internal/runner_test.go +++ b/programs/diagnostics/internal/runner_test.go @@ -1,25 +1,28 @@ +//go:build !no_docker + package internal_test import ( "context" "fmt" - "github.com/ClickHouse/clickhouse-diagnostics/internal" - "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors" - _ "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/clickhouse" - _ "github.com/ClickHouse/clickhouse-diagnostics/internal/collectors/system" - "github.com/ClickHouse/clickhouse-diagnostics/internal/outputs" - _ "github.com/ClickHouse/clickhouse-diagnostics/internal/outputs/file" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/config" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/test" - "github.com/ClickHouse/clickhouse-diagnostics/internal/platform/utils" - "github.com/stretchr/testify/require" - "github.com/testcontainers/testcontainers-go" - "github.com/testcontainers/testcontainers-go/wait" "io/ioutil" "os" "path" "strconv" "testing" + + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors" + _ "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/clickhouse" + _ "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/collectors/system" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/outputs" + _ "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/outputs/file" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/config" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/test" + "github.com/ClickHouse/ClickHouse/programs/diagnostics/internal/platform/utils" + "github.com/stretchr/testify/require" + "github.com/testcontainers/testcontainers-go" + "github.com/testcontainers/testcontainers-go/wait" ) func TestMain(m *testing.M) { diff --git a/tools/clickhouse-diagnostics/testdata/configs/include/xml/server-include.xml b/programs/diagnostics/testdata/configs/include/xml/server-include.xml similarity index 100% rename from tools/clickhouse-diagnostics/testdata/configs/include/xml/server-include.xml rename to programs/diagnostics/testdata/configs/include/xml/server-include.xml diff --git a/tools/clickhouse-diagnostics/testdata/configs/include/xml/user-include.xml b/programs/diagnostics/testdata/configs/include/xml/user-include.xml similarity index 100% rename from tools/clickhouse-diagnostics/testdata/configs/include/xml/user-include.xml rename to programs/diagnostics/testdata/configs/include/xml/user-include.xml diff --git a/tools/clickhouse-diagnostics/testdata/configs/include/yaml/server-include.yaml b/programs/diagnostics/testdata/configs/include/yaml/server-include.yaml similarity index 100% rename from tools/clickhouse-diagnostics/testdata/configs/include/yaml/server-include.yaml rename to programs/diagnostics/testdata/configs/include/yaml/server-include.yaml diff --git a/tools/clickhouse-diagnostics/testdata/configs/include/yaml/user-include.yaml b/programs/diagnostics/testdata/configs/include/yaml/user-include.yaml similarity index 100% rename from tools/clickhouse-diagnostics/testdata/configs/include/yaml/user-include.yaml rename to programs/diagnostics/testdata/configs/include/yaml/user-include.yaml diff --git a/tools/clickhouse-diagnostics/testdata/configs/xml/config.xml b/programs/diagnostics/testdata/configs/xml/config.xml similarity index 100% rename from tools/clickhouse-diagnostics/testdata/configs/xml/config.xml rename to programs/diagnostics/testdata/configs/xml/config.xml diff --git a/tools/clickhouse-diagnostics/testdata/configs/xml/users.d/default-password.xml b/programs/diagnostics/testdata/configs/xml/users.d/default-password.xml similarity index 100% rename from tools/clickhouse-diagnostics/testdata/configs/xml/users.d/default-password.xml rename to programs/diagnostics/testdata/configs/xml/users.d/default-password.xml diff --git a/tools/clickhouse-diagnostics/testdata/configs/xml/users.xml b/programs/diagnostics/testdata/configs/xml/users.xml similarity index 100% rename from tools/clickhouse-diagnostics/testdata/configs/xml/users.xml rename to programs/diagnostics/testdata/configs/xml/users.xml diff --git a/tools/clickhouse-diagnostics/testdata/configs/yaml/config.yaml b/programs/diagnostics/testdata/configs/yaml/config.yaml similarity index 100% rename from tools/clickhouse-diagnostics/testdata/configs/yaml/config.yaml rename to programs/diagnostics/testdata/configs/yaml/config.yaml diff --git a/tools/clickhouse-diagnostics/testdata/configs/yaml/users.d/default-password.yaml b/programs/diagnostics/testdata/configs/yaml/users.d/default-password.yaml similarity index 100% rename from tools/clickhouse-diagnostics/testdata/configs/yaml/users.d/default-password.yaml rename to programs/diagnostics/testdata/configs/yaml/users.d/default-password.yaml diff --git a/tools/clickhouse-diagnostics/testdata/configs/yaml/users.yaml b/programs/diagnostics/testdata/configs/yaml/users.yaml similarity index 100% rename from tools/clickhouse-diagnostics/testdata/configs/yaml/users.yaml rename to programs/diagnostics/testdata/configs/yaml/users.yaml diff --git a/tools/clickhouse-diagnostics/testdata/configs/yandex_xml/config.xml b/programs/diagnostics/testdata/configs/yandex_xml/config.xml similarity index 100% rename from tools/clickhouse-diagnostics/testdata/configs/yandex_xml/config.xml rename to programs/diagnostics/testdata/configs/yandex_xml/config.xml diff --git a/tools/clickhouse-diagnostics/testdata/docker/admin.xml b/programs/diagnostics/testdata/docker/admin.xml similarity index 100% rename from tools/clickhouse-diagnostics/testdata/docker/admin.xml rename to programs/diagnostics/testdata/docker/admin.xml diff --git a/tools/clickhouse-diagnostics/testdata/docker/custom.xml b/programs/diagnostics/testdata/docker/custom.xml similarity index 100% rename from tools/clickhouse-diagnostics/testdata/docker/custom.xml rename to programs/diagnostics/testdata/docker/custom.xml diff --git a/tools/clickhouse-diagnostics/testdata/logs/var/logs/clickhouse-server.err.log b/programs/diagnostics/testdata/logs/var/logs/clickhouse-server.err.log similarity index 100% rename from tools/clickhouse-diagnostics/testdata/logs/var/logs/clickhouse-server.err.log rename to programs/diagnostics/testdata/logs/var/logs/clickhouse-server.err.log diff --git a/tools/clickhouse-diagnostics/testdata/logs/var/logs/clickhouse-server.log b/programs/diagnostics/testdata/logs/var/logs/clickhouse-server.log similarity index 100% rename from tools/clickhouse-diagnostics/testdata/logs/var/logs/clickhouse-server.log rename to programs/diagnostics/testdata/logs/var/logs/clickhouse-server.log diff --git a/tools/clickhouse-diagnostics/testdata/logs/var/logs/clickhouse-server.log.gz b/programs/diagnostics/testdata/logs/var/logs/clickhouse-server.log.gz similarity index 100% rename from tools/clickhouse-diagnostics/testdata/logs/var/logs/clickhouse-server.log.gz rename to programs/diagnostics/testdata/logs/var/logs/clickhouse-server.log.gz diff --git a/programs/su/clickhouse-su.cpp b/programs/su/clickhouse-su.cpp index 9aa41085094..0979abf353d 100644 --- a/programs/su/clickhouse-su.cpp +++ b/programs/su/clickhouse-su.cpp @@ -59,7 +59,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) throwFromErrno(fmt::format("Cannot do 'getgrnam_r' to obtain gid from group name ({})", arg_gid), ErrorCodes::SYSTEM_ERROR); if (!result) - throw Exception("Group {} is not found in the system", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Group {} is not found in the system", arg_gid); gid = entry.gr_gid; } @@ -84,7 +84,7 @@ void setUserAndGroup(std::string arg_uid, std::string arg_gid) throwFromErrno(fmt::format("Cannot do 'getpwnam_r' to obtain uid from user name ({})", arg_uid), ErrorCodes::SYSTEM_ERROR); if (!result) - throw Exception("User {} is not found in the system", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "User {} is not found in the system", arg_uid); uid = entry.pw_uid; } diff --git a/src/Access/AccessBackup.cpp b/src/Access/AccessBackup.cpp index 0322ad7457b..bd1344a6f14 100644 --- a/src/Access/AccessBackup.cpp +++ b/src/Access/AccessBackup.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,7 @@ namespace DB namespace ErrorCodes { + extern const int CANNOT_RESTORE_TABLE; extern const int LOGICAL_ERROR; } @@ -139,7 +141,7 @@ namespace } catch (Exception & e) { - e.addMessage("While parsing " + file_path); + e.addMessage("While parsing " + file_path + " from backup"); throw; } } @@ -225,7 +227,7 @@ namespace } } - AccessRightsElements getRequiredAccessToRestore(const std::unordered_map & entities) + AccessRightsElements getRequiredAccessToRestore(const std::vector> & entities) { AccessRightsElements res; for (const auto & entity : entities | boost::adaptors::map_values) @@ -294,65 +296,78 @@ namespace } } -void backupAccessEntities( - BackupEntriesCollector & backup_entries_collector, + +std::pair makeBackupEntryForAccess( + const std::vector> access_entities, const String & data_path_in_backup, - const AccessControl & access_control, - AccessEntityType type) + size_t counter, + const AccessControl & access_control) { - auto entities = access_control.readAllForBackup(type, backup_entries_collector.getBackupSettings()); - auto dependencies = readDependenciesNamesAndTypes(findDependencies(entities), access_control); + auto dependencies = readDependenciesNamesAndTypes(findDependencies(access_entities), access_control); AccessEntitiesInBackup ab; - boost::range::copy(entities, std::inserter(ab.entities, ab.entities.end())); + boost::range::copy(access_entities, std::inserter(ab.entities, ab.entities.end())); ab.dependencies = std::move(dependencies); - backup_entries_collector.addBackupEntry(fs::path{data_path_in_backup} / "access.txt", ab.toBackupEntry()); + String filename = fmt::format("access{:02}.txt", counter + 1); /// access01.txt, access02.txt, ... + String file_path_in_backup = fs::path{data_path_in_backup} / filename; + return {file_path_in_backup, ab.toBackupEntry()}; } -AccessRestoreTask::AccessRestoreTask( - const BackupPtr & backup_, const RestoreSettings & restore_settings_, std::shared_ptr restore_coordination_) - : backup(backup_), restore_settings(restore_settings_), restore_coordination(restore_coordination_) +AccessRestorerFromBackup::AccessRestorerFromBackup( + const BackupPtr & backup_, const RestoreSettings & restore_settings_) + : backup(backup_), allow_unresolved_access_dependencies(restore_settings_.allow_unresolved_access_dependencies) { } -AccessRestoreTask::~AccessRestoreTask() = default; +AccessRestorerFromBackup::~AccessRestorerFromBackup() = default; -void AccessRestoreTask::addDataPath(const String & data_path) +void AccessRestorerFromBackup::addDataPath(const String & data_path, const QualifiedTableName & table_name_for_logs) { if (!data_paths.emplace(data_path).second) return; - String file_path = fs::path{data_path} / "access.txt"; - auto backup_entry = backup->readFile(file_path); - auto ab = AccessEntitiesInBackup::fromBackupEntry(*backup_entry, file_path); + fs::path data_path_in_backup_fs = data_path; + Strings filenames = backup->listFiles(data_path); + if (filenames.empty()) + return; + + for (const String & filename : filenames) + { + if (!filename.starts_with("access") || !filename.ends_with(".txt")) + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File name {} doesn't match the wildcard \"access*.txt\"", + table_name_for_logs.getFullName(), String{data_path_in_backup_fs / filename}); + } + + ::sort(filenames.begin(), filenames.end()); + + for (const String & filename : filenames) + { + String filepath_in_backup = data_path_in_backup_fs / filename; + auto backup_entry = backup->readFile(filepath_in_backup); + auto ab = AccessEntitiesInBackup::fromBackupEntry(*backup_entry, filepath_in_backup); + + boost::range::copy(ab.entities, std::back_inserter(entities)); + boost::range::copy(ab.dependencies, std::inserter(dependencies, dependencies.end())); + } - boost::range::copy(ab.entities, std::inserter(entities, entities.end())); - boost::range::copy(ab.dependencies, std::inserter(dependencies, dependencies.end())); for (const auto & id : entities | boost::adaptors::map_keys) dependencies.erase(id); } -bool AccessRestoreTask::hasDataPath(const String & data_path) const -{ - return data_paths.contains(data_path); -} - -AccessRightsElements AccessRestoreTask::getRequiredAccess() const +AccessRightsElements AccessRestorerFromBackup::getRequiredAccess() const { return getRequiredAccessToRestore(entities); } -void AccessRestoreTask::restore(AccessControl & access_control) const +std::vector> AccessRestorerFromBackup::getAccessEntities(const AccessControl & access_control) const { - auto old_to_new_ids = resolveDependencies(dependencies, access_control, restore_settings.allow_unresolved_access_dependencies); + auto new_entities = entities; - std::vector> new_entities; - boost::range::copy(entities, std::back_inserter(new_entities)); + auto old_to_new_ids = resolveDependencies(dependencies, access_control, allow_unresolved_access_dependencies); generateRandomIDs(new_entities, old_to_new_ids); - replaceDependencies(new_entities, old_to_new_ids); - access_control.insertFromBackup(new_entities, restore_settings, restore_coordination); + return new_entities; } } diff --git a/src/Access/AccessBackup.h b/src/Access/AccessBackup.h index 5c70e268eae..74f889e2c00 100644 --- a/src/Access/AccessBackup.h +++ b/src/Access/AccessBackup.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -9,46 +9,45 @@ namespace DB { class AccessControl; enum class AccessEntityType; -class BackupEntriesCollector; -class RestorerFromBackup; -class IBackup; -using BackupPtr = std::shared_ptr; -class IRestoreCoordination; struct IAccessEntity; using AccessEntityPtr = std::shared_ptr; class AccessRightsElements; +class IBackup; +using BackupPtr = std::shared_ptr; +class IBackupEntry; +using BackupEntryPtr = std::shared_ptr; +struct RestoreSettings; +struct QualifiedTableName; /// Makes a backup of access entities of a specified type. -void backupAccessEntities( - BackupEntriesCollector & backup_entries_collector, +std::pair makeBackupEntryForAccess( + const std::vector> access_entities, const String & data_path_in_backup, - const AccessControl & access_control, - AccessEntityType type); + size_t counter, + const AccessControl & access_control); + /// Restores access entities from a backup. -class AccessRestoreTask +class AccessRestorerFromBackup { public: - AccessRestoreTask( - const BackupPtr & backup_, const RestoreSettings & restore_settings_, std::shared_ptr restore_coordination_); - ~AccessRestoreTask(); + AccessRestorerFromBackup(const BackupPtr & backup_, const RestoreSettings & restore_settings_); + ~AccessRestorerFromBackup(); /// Adds a data path to loads access entities from. - void addDataPath(const String & data_path); - bool hasDataPath(const String & data_path) const; + void addDataPath(const String & data_path, const QualifiedTableName & table_name_for_logs); /// Checks that the current user can do restoring. AccessRightsElements getRequiredAccess() const; /// Inserts all access entities loaded from all the paths added by addDataPath(). - void restore(AccessControl & access_control) const; + std::vector> getAccessEntities(const AccessControl & access_control) const; private: BackupPtr backup; - RestoreSettings restore_settings; - std::shared_ptr restore_coordination; - std::unordered_map entities; + bool allow_unresolved_access_dependencies = false; + std::vector> entities; std::unordered_map> dependencies; std::unordered_set data_paths; }; diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index aa58044a6b0..7152820b5bc 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -459,20 +459,9 @@ UUID AccessControl::authenticate(const Credentials & credentials, const Poco::Ne } } -void AccessControl::backup(BackupEntriesCollector & backup_entries_collector, AccessEntityType type, const String & data_path_in_backup) const +void AccessControl::restoreFromBackup(RestorerFromBackup & restorer) { - backupAccessEntities(backup_entries_collector, data_path_in_backup, *this, type); -} - -void AccessControl::restore(RestorerFromBackup & restorer, const String & data_path_in_backup) -{ - /// The restorer must already know about `data_path_in_backup`, but let's check. - restorer.checkPathInBackupToRestoreAccess(data_path_in_backup); -} - -void AccessControl::insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) -{ - MultipleAccessStorage::insertFromBackup(entities_from_backup, restore_settings, restore_coordination); + MultipleAccessStorage::restoreFromBackup(restorer); changes_notifier->sendNotifications(); } diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index 90ad2895122..22ff0a488f7 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -42,8 +42,6 @@ class ClientInfo; class ExternalAuthenticators; class AccessChangesNotifier; struct Settings; -class BackupEntriesCollector; -class RestorerFromBackup; /// Manages access control entities. @@ -121,8 +119,7 @@ public: UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const; /// Makes a backup of access entities. - void backup(BackupEntriesCollector & backup_entries_collector, AccessEntityType type, const String & data_path_in_backup) const; - static void restore(RestorerFromBackup & restorer, const String & data_path_in_backup); + void restoreFromBackup(RestorerFromBackup & restorer) override; void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config); @@ -198,8 +195,6 @@ public: /// Gets manager of notifications. AccessChangesNotifier & getChangesNotifier(); - void insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) override; - private: class ContextAccessCache; class CustomSettingsPrefixes; diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 221113cb425..995a46d07ca 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -412,15 +412,18 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg return false; }; + if (is_full_access) + return access_granted(); + + if (user_was_dropped) + return access_denied("User has been dropped", ErrorCodes::UNKNOWN_USER); + if (flags & AccessType::CLUSTER && !access_control->doesOnClusterQueriesRequireClusterGrant()) flags &= ~AccessType::CLUSTER; - if (!flags || is_full_access) + if (!flags) return access_granted(); - if (!tryGetUser()) - return access_denied("User has been dropped", ErrorCodes::UNKNOWN_USER); - /// Access to temporary tables is controlled in an unusual way, not like normal tables. /// Creating of temporary tables is controlled by AccessType::CREATE_TEMPORARY_TABLES grant, /// and other grants are considered as always given. @@ -600,9 +603,6 @@ void ContextAccess::checkGrantOption(const AccessRightsElements & elements) cons template bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const GetNameFunction & get_name_function) const { - if (!std::size(role_ids) || is_full_access) - return true; - auto show_error = [this](const String & msg, int error_code [[maybe_unused]]) { UNUSED(this); @@ -610,12 +610,18 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const throw Exception(getUserName() + ": " + msg, error_code); }; - if (!tryGetUser()) + if (is_full_access) + return true; + + if (user_was_dropped) { show_error("User has been dropped", ErrorCodes::UNKNOWN_USER); return false; } + if (!std::size(role_ids)) + return true; + if (isGranted(AccessType::ROLE_ADMIN)) return true; diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 231e325196d..994abc7b53a 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -650,19 +651,24 @@ void DiskAccessStorage::deleteAccessEntityOnDisk(const UUID & id) const } -void DiskAccessStorage::insertFromBackup( - const std::vector> & entities_from_backup, - const RestoreSettings & restore_settings, - std::shared_ptr) +void DiskAccessStorage::restoreFromBackup(RestorerFromBackup & restorer) { if (!isRestoreAllowed()) throwRestoreNotAllowed(); - bool replace_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kReplace); - bool throw_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kCreate); + auto entities = restorer.getAccessEntitiesToRestore(); + if (entities.empty()) + return; - for (const auto & [id, entity] : entities_from_backup) - insertWithID(id, entity, replace_if_exists, throw_if_exists); + auto create_access = restorer.getRestoreSettings().create_access; + bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace); + bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate); + + restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists] + { + for (const auto & [id, entity] : entities) + insertWithID(id, entity, replace_if_exists, throw_if_exists); + }); } } diff --git a/src/Access/DiskAccessStorage.h b/src/Access/DiskAccessStorage.h index 1bdefbf82f9..d3bd61ff353 100644 --- a/src/Access/DiskAccessStorage.h +++ b/src/Access/DiskAccessStorage.h @@ -30,7 +30,7 @@ public: bool exists(const UUID & id) const override; bool isBackupAllowed() const override { return backup_allowed; } - void insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) override; + void restoreFromBackup(RestorerFromBackup & restorer) override; private: std::optional findImpl(AccessEntityType type, const String & name) const override; diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 476b1674ce1..fa9c78816c7 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -2,9 +2,12 @@ #include #include #include +#include +#include #include #include #include +#include #include #include #include @@ -520,26 +523,30 @@ bool IAccessStorage::isAddressAllowed(const User & user, const Poco::Net::IPAddr } -bool IAccessStorage::isRestoreAllowed() const -{ - return isBackupAllowed() && !isReadOnly(); -} - -std::vector> IAccessStorage::readAllForBackup(AccessEntityType type, const BackupSettings &) const +void IAccessStorage::backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const { if (!isBackupAllowed()) throwBackupNotAllowed(); - auto res = readAllWithIDs(type); - boost::range::remove_erase_if(res, [](const std::pair & x) { return !x.second->isBackupAllowed(); }); - return res; + auto entities = readAllWithIDs(type); + boost::range::remove_erase_if(entities, [](const std::pair & x) { return !x.second->isBackupAllowed(); }); + + auto backup_entry = makeBackupEntryForAccess( + entities, + data_path_in_backup, + backup_entries_collector.getAccessCounter(type), + backup_entries_collector.getContext()->getAccessControl()); + + backup_entries_collector.addBackupEntry(backup_entry); } -void IAccessStorage::insertFromBackup(const std::vector> &, const RestoreSettings &, std::shared_ptr) + +void IAccessStorage::restoreFromBackup(RestorerFromBackup &) { if (!isRestoreAllowed()) throwRestoreNotAllowed(); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "insertFromBackup() is not implemented in {}", getStorageType()); + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "restoreFromBackup() is not implemented in {}", getStorageType()); } diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index 7b43309204d..394d3ed6358 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -18,9 +18,8 @@ struct User; class Credentials; class ExternalAuthenticators; enum class AuthenticationType; -struct BackupSettings; -struct RestoreSettings; -class IRestoreCoordination; +class BackupEntriesCollector; +class RestorerFromBackup; /// Contains entities, i.e. instances of classes derived from IAccessEntity. /// The implementations of this class MUST be thread-safe. @@ -158,11 +157,11 @@ public: /// Returns true if this storage can be stored to or restored from a backup. virtual bool isBackupAllowed() const { return false; } - virtual bool isRestoreAllowed() const; + virtual bool isRestoreAllowed() const { return isBackupAllowed() && !isReadOnly(); } /// Makes a backup of this access storage. - virtual std::vector> readAllForBackup(AccessEntityType type, const BackupSettings & backup_settings) const; - virtual void insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination); + virtual void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const; + virtual void restoreFromBackup(RestorerFromBackup & restorer); protected: virtual std::optional findImpl(AccessEntityType type, const String & name) const = 0; diff --git a/src/Access/MemoryAccessStorage.cpp b/src/Access/MemoryAccessStorage.cpp index ad877e263ad..60669532e25 100644 --- a/src/Access/MemoryAccessStorage.cpp +++ b/src/Access/MemoryAccessStorage.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -272,19 +273,24 @@ void MemoryAccessStorage::setAll(const std::vector> & entities_from_backup, - const RestoreSettings & restore_settings, - std::shared_ptr) +void MemoryAccessStorage::restoreFromBackup(RestorerFromBackup & restorer) { if (!isRestoreAllowed()) throwRestoreNotAllowed(); - bool replace_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kReplace); - bool throw_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kCreate); + auto entities = restorer.getAccessEntitiesToRestore(); + if (entities.empty()) + return; - for (const auto & [id, entity] : entities_from_backup) - insertWithID(id, entity, replace_if_exists, throw_if_exists); + auto create_access = restorer.getRestoreSettings().create_access; + bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace); + bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate); + + restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists] + { + for (const auto & [id, entity] : entities) + insertWithID(id, entity, replace_if_exists, throw_if_exists); + }); } } diff --git a/src/Access/MemoryAccessStorage.h b/src/Access/MemoryAccessStorage.h index aa4cd08252c..5c8d33ed443 100644 --- a/src/Access/MemoryAccessStorage.h +++ b/src/Access/MemoryAccessStorage.h @@ -29,7 +29,7 @@ public: bool exists(const UUID & id) const override; bool isBackupAllowed() const override { return backup_allowed; } - void insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) override; + void restoreFromBackup(RestorerFromBackup & restorer) override; private: std::optional findImpl(AccessEntityType type, const String & name) const override; diff --git a/src/Access/MultipleAccessStorage.cpp b/src/Access/MultipleAccessStorage.cpp index 6f654f68e57..e7151cc7b4b 100644 --- a/src/Access/MultipleAccessStorage.cpp +++ b/src/Access/MultipleAccessStorage.cpp @@ -383,40 +383,38 @@ bool MultipleAccessStorage::isRestoreAllowed() const } -std::vector> MultipleAccessStorage::readAllForBackup(AccessEntityType type, const BackupSettings & backup_settings) const +void MultipleAccessStorage::backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const { - std::vector> res; auto storages = getStoragesInternal(); - size_t count = 0; + bool allowed = false; for (const auto & storage : *storages) { if (storage->isBackupAllowed()) { - insertAtEnd(res, storage->readAllForBackup(type, backup_settings)); - ++count; + storage->backup(backup_entries_collector, data_path_in_backup, type); + allowed = true; } } - if (!count) + if (!allowed) throwBackupNotAllowed(); - - return res; } - -void MultipleAccessStorage::insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) +void MultipleAccessStorage::restoreFromBackup(RestorerFromBackup & restorer) { auto storages = getStoragesInternal(); + for (const auto & storage : *storages) { if (storage->isRestoreAllowed()) { - storage->insertFromBackup(entities_from_backup, restore_settings, restore_coordination); + storage->restoreFromBackup(restorer); return; } } - throwRestoreNotAllowed(); + + throwBackupNotAllowed(); } } diff --git a/src/Access/MultipleAccessStorage.h b/src/Access/MultipleAccessStorage.h index 2eacdafd3f3..58cf09fd0ff 100644 --- a/src/Access/MultipleAccessStorage.h +++ b/src/Access/MultipleAccessStorage.h @@ -45,8 +45,8 @@ public: bool isBackupAllowed() const override; bool isRestoreAllowed() const override; - std::vector> readAllForBackup(AccessEntityType type, const BackupSettings & backup_settings) const override; - void insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) override; + void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const override; + void restoreFromBackup(RestorerFromBackup & restorer) override; protected: std::optional findImpl(AccessEntityType type, const String & name) const override; diff --git a/src/Access/ReplicatedAccessStorage.cpp b/src/Access/ReplicatedAccessStorage.cpp index 6a9d716c2f9..f6c8d0a7153 100644 --- a/src/Access/ReplicatedAccessStorage.cpp +++ b/src/Access/ReplicatedAccessStorage.cpp @@ -2,10 +2,14 @@ #include #include #include +#include +#include +#include #include +#include #include #include -#include +#include #include #include #include @@ -13,6 +17,7 @@ #include #include #include +#include namespace DB @@ -613,19 +618,64 @@ AccessEntityPtr ReplicatedAccessStorage::readImpl(const UUID & id, bool throw_if return entry.entity; } -void ReplicatedAccessStorage::insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) + +void ReplicatedAccessStorage::backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const +{ + if (!isBackupAllowed()) + throwBackupNotAllowed(); + + auto entities = readAllWithIDs(type); + boost::range::remove_erase_if(entities, [](const std::pair & x) { return !x.second->isBackupAllowed(); }); + + auto backup_entry_with_path = makeBackupEntryForAccess( + entities, + data_path_in_backup, + backup_entries_collector.getAccessCounter(type), + backup_entries_collector.getContext()->getAccessControl()); + + auto backup_coordination = backup_entries_collector.getBackupCoordination(); + backup_coordination->addReplicatedAccessPath(zookeeper_path, backup_entry_with_path.first); + String current_host_id = backup_entries_collector.getBackupSettings().host_id; + backup_coordination->setReplicatedAccessHost(zookeeper_path, current_host_id); + + backup_entries_collector.addPostTask( + [backup_entry = backup_entry_with_path.second, + zookeeper_path = zookeeper_path, + current_host_id, + &backup_entries_collector, + backup_coordination] + { + if (current_host_id != backup_coordination->getReplicatedAccessHost(zookeeper_path)) + return; + + for (const String & path : backup_coordination->getReplicatedAccessPaths(zookeeper_path)) + backup_entries_collector.addBackupEntry(path, backup_entry); + }); +} + + +void ReplicatedAccessStorage::restoreFromBackup(RestorerFromBackup & restorer) { if (!isRestoreAllowed()) throwRestoreNotAllowed(); + auto restore_coordination = restorer.getRestoreCoordination(); if (!restore_coordination->acquireReplicatedAccessStorage(zookeeper_path)) return; - bool replace_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kReplace); - bool throw_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kCreate); + auto entities = restorer.getAccessEntitiesToRestore(); + if (entities.empty()) + return; - for (const auto & [id, entity] : entities_from_backup) - insertWithID(id, entity, replace_if_exists, throw_if_exists); + auto create_access = restorer.getRestoreSettings().create_access; + bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace); + bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate); + + restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists] + { + for (const auto & [id, entity] : entities) + insertWithID(id, entity, replace_if_exists, throw_if_exists); + }); } } diff --git a/src/Access/ReplicatedAccessStorage.h b/src/Access/ReplicatedAccessStorage.h index 7cccdc1793f..6311e2ac7c0 100644 --- a/src/Access/ReplicatedAccessStorage.h +++ b/src/Access/ReplicatedAccessStorage.h @@ -38,7 +38,8 @@ public: bool exists(const UUID & id) const override; bool isBackupAllowed() const override { return backup_allowed; } - void insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) override; + void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const override; + void restoreFromBackup(RestorerFromBackup & restorer) override; private: String zookeeper_path; diff --git a/src/Access/SettingsProfileElement.h b/src/Access/SettingsProfileElement.h index 818e7804a76..a4124826b40 100644 --- a/src/Access/SettingsProfileElement.h +++ b/src/Access/SettingsProfileElement.h @@ -20,6 +20,7 @@ class AccessControl; struct SettingsProfileElement { std::optional parent_profile; + String setting_name; Field value; Field min_value; diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 4561d7f5766..1d755fdf1da 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -26,6 +26,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int UNKNOWN_ADDRESS_PATTERN_TYPE; + extern const int THERE_IS_NO_PROFILE; extern const int NOT_IMPLEMENTED; } @@ -47,7 +48,7 @@ namespace UUID generateID(const IAccessEntity & entity) { return generateID(entity.getType(), entity.getName()); } - UserPtr parseUser(const Poco::Util::AbstractConfiguration & config, const String & user_name, bool allow_no_password, bool allow_plaintext_password) + UserPtr parseUser(const Poco::Util::AbstractConfiguration & config, const String & user_name, const std::unordered_set & allowed_profile_ids, bool allow_no_password, bool allow_plaintext_password) { auto user = std::make_shared(); user->setName(user_name); @@ -140,8 +141,11 @@ namespace if (config.has(profile_name_config)) { auto profile_name = config.getString(profile_name_config); + auto profile_id = generateID(AccessEntityType::SETTINGS_PROFILE, profile_name); + if (!allowed_profile_ids.contains(profile_id)) + throw Exception(ErrorCodes::THERE_IS_NO_PROFILE, "Profile {} was not found", profile_name); SettingsProfileElement profile_element; - profile_element.parent_profile = generateID(AccessEntityType::SETTINGS_PROFILE, profile_name); + profile_element.parent_profile = profile_id; user->settings.push_back(std::move(profile_element)); } @@ -231,7 +235,7 @@ namespace } - std::vector parseUsers(const Poco::Util::AbstractConfiguration & config, bool allow_no_password, bool allow_plaintext_password) + std::vector parseUsers(const Poco::Util::AbstractConfiguration & config, const std::unordered_set & allowed_profile_ids, bool allow_no_password, bool allow_plaintext_password) { Poco::Util::AbstractConfiguration::Keys user_names; config.keys("users", user_names); @@ -242,7 +246,7 @@ namespace { try { - users.push_back(parseUser(config, user_name, allow_no_password, allow_plaintext_password)); + users.push_back(parseUser(config, user_name, allowed_profile_ids, allow_no_password, allow_plaintext_password)); } catch (Exception & e) { @@ -457,6 +461,7 @@ namespace std::shared_ptr parseSettingsProfile( const Poco::Util::AbstractConfiguration & config, const String & profile_name, + const std::unordered_set & allowed_parent_profile_ids, const AccessControl & access_control) { auto profile = std::make_shared(); @@ -471,8 +476,11 @@ namespace if (key == "profile" || key.starts_with("profile[")) { String parent_profile_name = config.getString(profile_config + "." + key); + auto parent_profile_id = generateID(AccessEntityType::SETTINGS_PROFILE, parent_profile_name); + if (!allowed_parent_profile_ids.contains(parent_profile_id)) + throw Exception(ErrorCodes::THERE_IS_NO_PROFILE, "Parent profile '{}' was not found", parent_profile_name); SettingsProfileElement profile_element; - profile_element.parent_profile = generateID(AccessEntityType::SETTINGS_PROFILE, parent_profile_name); + profile_element.parent_profile = parent_profile_id; profile->elements.emplace_back(std::move(profile_element)); continue; } @@ -498,6 +506,7 @@ namespace std::vector parseSettingsProfiles( const Poco::Util::AbstractConfiguration & config, + const std::unordered_set & allowed_parent_profile_ids, const AccessControl & access_control) { Poco::Util::AbstractConfiguration::Keys profile_names; @@ -510,7 +519,7 @@ namespace { try { - profiles.push_back(parseSettingsProfile(config, profile_name, access_control)); + profiles.push_back(parseSettingsProfile(config, profile_name, allowed_parent_profile_ids, access_control)); } catch (Exception & e) { @@ -521,6 +530,17 @@ namespace return profiles; } + + + std::unordered_set getAllowedSettingsProfileIDs(const Poco::Util::AbstractConfiguration & config) + { + Poco::Util::AbstractConfiguration::Keys profile_names; + config.keys("profiles", profile_names); + std::unordered_set ids; + for (const auto & profile_name : profile_names) + ids.emplace(generateID(AccessEntityType::SETTINGS_PROFILE, profile_name)); + return ids; + } } UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, AccessControl & access_control_, bool allow_backup_) @@ -569,16 +589,18 @@ void UsersConfigAccessStorage::parseFromConfig(const Poco::Util::AbstractConfigu { try { + auto allowed_profile_ids = getAllowedSettingsProfileIDs(config); bool no_password_allowed = access_control.isNoPasswordAllowed(); bool plaintext_password_allowed = access_control.isPlaintextPasswordAllowed(); + std::vector> all_entities; - for (const auto & entity : parseUsers(config, no_password_allowed, plaintext_password_allowed)) + for (const auto & entity : parseUsers(config, allowed_profile_ids, no_password_allowed, plaintext_password_allowed)) all_entities.emplace_back(generateID(*entity), entity); for (const auto & entity : parseQuotas(config)) all_entities.emplace_back(generateID(*entity), entity); for (const auto & entity : parseRowPolicies(config, access_control.isEnabledUsersWithoutRowPoliciesCanReadRows())) all_entities.emplace_back(generateID(*entity), entity); - for (const auto & entity : parseSettingsProfiles(config, access_control)) + for (const auto & entity : parseSettingsProfiles(config, allowed_profile_ids, access_control)) all_entities.emplace_back(generateID(*entity), entity); memory_storage.setAll(all_entities); } diff --git a/src/AggregateFunctions/AggregateFunctionMap.h b/src/AggregateFunctions/AggregateFunctionMap.h index 8d77e22300b..5ccc9041c36 100644 --- a/src/AggregateFunctions/AggregateFunctionMap.h +++ b/src/AggregateFunctions/AggregateFunctionMap.h @@ -169,12 +169,21 @@ public: { const auto & it = merged_maps.find(elem.first); - if (it != merged_maps.end()) + AggregateDataPtr nested_place; + if (it == merged_maps.end()) { - nested_func->merge(it->second, elem.second, arena); + // elem.second cannot be copied since this it will be destroyed after merging, + // and lead to use-after-free. + nested_place = arena->alignedAlloc(nested_func->sizeOfData(), nested_func->alignOfData()); + nested_func->create(nested_place); + merged_maps.emplace(elem.first, nested_place); } else - merged_maps[elem.first] = elem.second; + { + nested_place = it->second; + } + + nested_func->merge(nested_place, elem.second, arena); } } diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h index a9bf8254f35..670dd5948f7 100644 --- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h +++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h @@ -31,8 +31,8 @@ struct RankCorrelationData : public StatisticalSample RanksArray ranks_y; std::tie(ranks_y, std::ignore) = computeRanksAndTieCorrection(this->y); - /// In our case sizes of both samples are equal. - const auto size = this->size_x; + /// Sizes can be non-equal due to skipped NaNs. + const auto size = std::min(this->size_x, this->size_y); /// Count d^2 sum Float64 answer = 0; diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/src/AggregateFunctions/AggregateFunctionUniqCombined.h index 16c9c6c6b57..51020abe826 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -70,9 +70,9 @@ namespace detail } // Unlike HashTableGrower always grows to power of 2. -struct UniqCombinedHashTableGrower : public HashTableGrower<> +struct UniqCombinedHashTableGrower : public HashTableGrowerWithPrecalculation<> { - void increaseSize() { ++size_degree; } + void increaseSize() { increaseSizeDegree(1); } }; template diff --git a/src/AggregateFunctions/StatCommon.h b/src/AggregateFunctions/StatCommon.h index 29163b63f77..ff824ca11b8 100644 --- a/src/AggregateFunctions/StatCommon.h +++ b/src/AggregateFunctions/StatCommon.h @@ -31,8 +31,8 @@ std::pair computeRanksAndTieCorrection(const Values & value /// Save initial positions, than sort indices according to the values. std::vector indexes(size); std::iota(indexes.begin(), indexes.end(), 0); - ::sort(indexes.begin(), indexes.end(), - [&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; }); + std::sort(indexes.begin(), indexes.end(), + [&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; }); size_t left = 0; Float64 tie_numenator = 0; @@ -74,12 +74,18 @@ struct StatisticalSample void addX(X value, Arena * arena) { + if (isNaN(value)) + return; + ++size_x; x.push_back(value, arena); } void addY(Y value, Arena * arena) { + if (isNaN(value)) + return; + ++size_y; y.push_back(value, arena); } diff --git a/src/Backups/BackupCoordinationDistributed.cpp b/src/Backups/BackupCoordinationDistributed.cpp index 945239482fc..5b932229e71 100644 --- a/src/Backups/BackupCoordinationDistributed.cpp +++ b/src/Backups/BackupCoordinationDistributed.cpp @@ -131,7 +131,7 @@ namespace BackupCoordinationDistributed::BackupCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_) : zookeeper_path(zookeeper_path_) , get_zookeeper(get_zookeeper_) - , stage_sync(zookeeper_path_ + "/stage", get_zookeeper_, &Poco::Logger::get("BackupCoordination")) + , status_sync(zookeeper_path_ + "/status", get_zookeeper_, &Poco::Logger::get("BackupCoordination")) { createRootNodes(); } @@ -145,6 +145,8 @@ void BackupCoordinationDistributed::createRootNodes() zookeeper->createIfNotExists(zookeeper_path, ""); zookeeper->createIfNotExists(zookeeper_path + "/repl_part_names", ""); zookeeper->createIfNotExists(zookeeper_path + "/repl_data_paths", ""); + zookeeper->createIfNotExists(zookeeper_path + "/repl_access_host", ""); + zookeeper->createIfNotExists(zookeeper_path + "/repl_access_paths", ""); zookeeper->createIfNotExists(zookeeper_path + "/file_names", ""); zookeeper->createIfNotExists(zookeeper_path + "/file_infos", ""); zookeeper->createIfNotExists(zookeeper_path + "/archive_suffixes", ""); @@ -157,19 +159,24 @@ void BackupCoordinationDistributed::removeAllNodes() } -void BackupCoordinationDistributed::syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) +void BackupCoordinationDistributed::setStatus(const String & current_host, const String & new_status, const String & message) { - stage_sync.syncStage(current_host, new_stage, wait_hosts, timeout); + status_sync.set(current_host, new_status, message); } -void BackupCoordinationDistributed::syncStageError(const String & current_host, const String & error_message) +Strings BackupCoordinationDistributed::setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) { - stage_sync.syncStageError(current_host, error_message); + return status_sync.setAndWait(current_host, new_status, message, all_hosts); +} + +Strings BackupCoordinationDistributed::setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) +{ + return status_sync.setAndWaitFor(current_host, new_status, message, all_hosts, timeout_ms); } void BackupCoordinationDistributed::addReplicatedPartNames( - const String & table_zk_path, + const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) @@ -181,39 +188,39 @@ void BackupCoordinationDistributed::addReplicatedPartNames( } auto zookeeper = get_zookeeper(); - String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_zk_path); + String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_shared_id); zookeeper->createIfNotExists(path, ""); path += "/" + escapeForFileName(replica_name); zookeeper->create(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs), zkutil::CreateMode::Persistent); } -Strings BackupCoordinationDistributed::getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const +Strings BackupCoordinationDistributed::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const { std::lock_guard lock{mutex}; prepareReplicatedPartNames(); - return replicated_part_names->getPartNames(table_zk_path, replica_name); + return replicated_part_names->getPartNames(table_shared_id, replica_name); } void BackupCoordinationDistributed::addReplicatedDataPath( - const String & table_zk_path, const String & data_path) + const String & table_shared_id, const String & data_path) { auto zookeeper = get_zookeeper(); - String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_zk_path); + String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id); + zookeeper->createIfNotExists(path, ""); + path += "/" + escapeForFileName(data_path); zookeeper->createIfNotExists(path, ""); - path += "/"; - zookeeper->create(path, data_path, zkutil::CreateMode::PersistentSequential); } -Strings BackupCoordinationDistributed::getReplicatedDataPaths(const String & table_zk_path) const +Strings BackupCoordinationDistributed::getReplicatedDataPaths(const String & table_shared_id) const { auto zookeeper = get_zookeeper(); - String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_zk_path); + String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id); Strings children = zookeeper->getChildren(path); Strings data_paths; data_paths.reserve(children.size()); for (const String & child : children) - data_paths.push_back(zookeeper->get(path + "/" + child)); + data_paths.push_back(unescapeForFileName(child)); return data_paths; } @@ -240,6 +247,47 @@ void BackupCoordinationDistributed::prepareReplicatedPartNames() const } +void BackupCoordinationDistributed::addReplicatedAccessPath(const String & access_zk_path, const String & file_path) +{ + auto zookeeper = get_zookeeper(); + String path = zookeeper_path + "/repl_access_paths/" + escapeForFileName(access_zk_path); + zookeeper->createIfNotExists(path, ""); + path += "/" + escapeForFileName(file_path); + zookeeper->createIfNotExists(path, ""); +} + +Strings BackupCoordinationDistributed::getReplicatedAccessPaths(const String & access_zk_path) const +{ + auto zookeeper = get_zookeeper(); + String path = zookeeper_path + "/repl_access_paths/" + escapeForFileName(access_zk_path); + Strings children = zookeeper->getChildren(path); + Strings file_paths; + file_paths.reserve(children.size()); + for (const String & child : children) + file_paths.push_back(unescapeForFileName(child)); + return file_paths; +} + +void BackupCoordinationDistributed::setReplicatedAccessHost(const String & access_zk_path, const String & host_id) +{ + auto zookeeper = get_zookeeper(); + String path = zookeeper_path + "/repl_access_host/" + escapeForFileName(access_zk_path); + auto code = zookeeper->tryCreate(path, host_id, zkutil::CreateMode::Persistent); + if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS)) + throw zkutil::KeeperException(code, path); + + if (code == Coordination::Error::ZNODEEXISTS) + zookeeper->set(path, host_id); +} + +String BackupCoordinationDistributed::getReplicatedAccessHost(const String & access_zk_path) const +{ + auto zookeeper = get_zookeeper(); + String path = zookeeper_path + "/repl_access_host/" + escapeForFileName(access_zk_path); + return zookeeper->get(path); +} + + void BackupCoordinationDistributed::addFileInfo(const FileInfo & file_info, bool & is_data_file_required) { auto zookeeper = get_zookeeper(); diff --git a/src/Backups/BackupCoordinationDistributed.h b/src/Backups/BackupCoordinationDistributed.h index 2872e1f3ae4..813132bd0b8 100644 --- a/src/Backups/BackupCoordinationDistributed.h +++ b/src/Backups/BackupCoordinationDistributed.h @@ -14,19 +14,26 @@ public: BackupCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_); ~BackupCoordinationDistributed() override; - void syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) override; - void syncStageError(const String & current_host, const String & error_message) override; + void setStatus(const String & current_host, const String & new_status, const String & message) override; + Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) override; + Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) override; void addReplicatedPartNames( - const String & table_zk_path, + const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) override; - Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const override; + Strings getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const override; - void addReplicatedDataPath(const String & table_zk_path, const String & data_path) override; - Strings getReplicatedDataPaths(const String & table_zk_path) const override; + void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override; + Strings getReplicatedDataPaths(const String & table_shared_id) const override; + + void addReplicatedAccessPath(const String & access_zk_path, const String & file_path) override; + Strings getReplicatedAccessPaths(const String & access_zk_path) const override; + + void setReplicatedAccessHost(const String & access_zk_path, const String & host_id) override; + String getReplicatedAccessHost(const String & access_zk_path) const override; void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override; void updateFileInfo(const FileInfo & file_info) override; @@ -51,7 +58,7 @@ private: const String zookeeper_path; const zkutil::GetZooKeeper get_zookeeper; - BackupCoordinationStageSync stage_sync; + BackupCoordinationStatusSync status_sync; mutable std::mutex mutex; mutable std::optional replicated_part_names; diff --git a/src/Backups/BackupCoordinationHelpers.cpp b/src/Backups/BackupCoordinationHelpers.cpp index 9528f888770..7f570ba9c85 100644 --- a/src/Backups/BackupCoordinationHelpers.cpp +++ b/src/Backups/BackupCoordinationHelpers.cpp @@ -157,7 +157,7 @@ BackupCoordinationReplicatedPartNames::BackupCoordinationReplicatedPartNames() = BackupCoordinationReplicatedPartNames::~BackupCoordinationReplicatedPartNames() = default; void BackupCoordinationReplicatedPartNames::addPartNames( - const String & table_zk_path, + const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) @@ -165,7 +165,7 @@ void BackupCoordinationReplicatedPartNames::addPartNames( if (part_names_prepared) throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after getPartNames()"); - auto & table_info = table_infos[table_zk_path]; + auto & table_info = table_infos[table_shared_id]; if (!table_info.covered_parts_finder) table_info.covered_parts_finder = std::make_unique(table_name_for_logs); @@ -207,10 +207,10 @@ void BackupCoordinationReplicatedPartNames::addPartNames( } } -Strings BackupCoordinationReplicatedPartNames::getPartNames(const String & table_zk_path, const String & replica_name) const +Strings BackupCoordinationReplicatedPartNames::getPartNames(const String & table_shared_id, const String & replica_name) const { preparePartNames(); - auto it = table_infos.find(table_zk_path); + auto it = table_infos.find(table_shared_id); if (it == table_infos.end()) return {}; const auto & replicas_parts = it->second.replicas_parts; @@ -243,7 +243,7 @@ void BackupCoordinationReplicatedPartNames::preparePartNames() const /// Helps to wait until all hosts come to a specified stage. -BackupCoordinationStageSync::BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_) +BackupCoordinationStatusSync::BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_) : zookeeper_path(zookeeper_path_) , get_zookeeper(get_zookeeper_) , log(log_) @@ -251,63 +251,78 @@ BackupCoordinationStageSync::BackupCoordinationStageSync(const String & zookeepe createRootNodes(); } -void BackupCoordinationStageSync::createRootNodes() +void BackupCoordinationStatusSync::createRootNodes() { auto zookeeper = get_zookeeper(); zookeeper->createAncestors(zookeeper_path); zookeeper->createIfNotExists(zookeeper_path, ""); } -void BackupCoordinationStageSync::syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) +void BackupCoordinationStatusSync::set(const String & current_host, const String & new_status, const String & message) { - /// Put new stage to ZooKeeper. - auto zookeeper = get_zookeeper(); - zookeeper->createIfNotExists(zookeeper_path + "/" + current_host + "|" + std::to_string(new_stage), ""); + setImpl(current_host, new_status, message, {}, {}); +} - if (wait_hosts.empty() || ((wait_hosts.size() == 1) && (wait_hosts.front() == current_host))) - return; +Strings BackupCoordinationStatusSync::setAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) +{ + return setImpl(current_host, new_status, message, all_hosts, {}); +} + +Strings BackupCoordinationStatusSync::setAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) +{ + return setImpl(current_host, new_status, message, all_hosts, timeout_ms); +} + +Strings BackupCoordinationStatusSync::setImpl(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, const std::optional & timeout_ms) +{ + /// Put new status to ZooKeeper. + auto zookeeper = get_zookeeper(); + zookeeper->createIfNotExists(zookeeper_path + "/" + current_host + "|" + new_status, message); + + if (all_hosts.empty() || (new_status == kErrorStatus)) + return {}; + + if ((all_hosts.size() == 1) && (all_hosts.front() == current_host)) + return {message}; /// Wait for other hosts. - /// Current stages of all hosts. + Strings ready_hosts_results; + ready_hosts_results.resize(all_hosts.size()); + + std::map /* index in `ready_hosts_results` */> unready_hosts; + for (size_t i = 0; i != all_hosts.size(); ++i) + unready_hosts[all_hosts[i]].push_back(i); + std::optional host_with_error; std::optional error_message; - std::map> unready_hosts; - for (const String & host : wait_hosts) - unready_hosts.emplace(host, std::optional{}); - /// Process ZooKeeper's nodes and set `all_hosts_ready` or `unready_host` or `error_message`. auto process_zk_nodes = [&](const Strings & zk_nodes) { for (const String & zk_node : zk_nodes) { - if (zk_node == "error") + if (zk_node.starts_with("remove_watch-")) + continue; + + size_t separator_pos = zk_node.find('|'); + if (separator_pos == String::npos) + throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected zk node {}", zookeeper_path + "/" + zk_node); + String host = zk_node.substr(0, separator_pos); + String status = zk_node.substr(separator_pos + 1); + if (status == kErrorStatus) { - String str = zookeeper->get(zookeeper_path + "/" + zk_node); - size_t separator_pos = str.find('|'); - if (separator_pos == String::npos) - throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected value of zk node {}: {}", zookeeper_path + "/" + zk_node, str); - host_with_error = str.substr(0, separator_pos); - error_message = str.substr(separator_pos + 1); + host_with_error = host; + error_message = zookeeper->get(zookeeper_path + "/" + zk_node); return; } - else if (!zk_node.starts_with("remove_watch-")) + auto it = unready_hosts.find(host); + if ((it != unready_hosts.end()) && (status == new_status)) { - size_t separator_pos = zk_node.find('|'); - if (separator_pos == String::npos) - throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected zk node {}", zookeeper_path + "/" + zk_node); - String host = zk_node.substr(0, separator_pos); - int found_stage = parseFromString(zk_node.substr(separator_pos + 1)); - auto it = unready_hosts.find(host); - if (it != unready_hosts.end()) - { - auto & stage = it->second; - if (!stage || (stage < found_stage)) - stage = found_stage; - if (stage >= new_stage) - unready_hosts.erase(it); - } + String result = zookeeper->get(zookeeper_path + "/" + zk_node); + for (size_t i : it->second) + ready_hosts_results[i] = result; + unready_hosts.erase(it); } } }; @@ -324,7 +339,8 @@ void BackupCoordinationStageSync::syncStage(const String & current_host, int new auto watch_triggered = [&] { return !watch_set; }; - bool use_timeout = (timeout.count() >= 0); + bool use_timeout = timeout_ms.has_value(); + std::chrono::milliseconds timeout{timeout_ms.value_or(0)}; std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now(); std::chrono::steady_clock::duration elapsed; std::mutex dummy_mutex; @@ -369,12 +385,8 @@ void BackupCoordinationStageSync::syncStage(const String & current_host, int new unready_hosts.begin()->first, to_string(elapsed)); } -} -void BackupCoordinationStageSync::syncStageError(const String & current_host, const String & error_message) -{ - auto zookeeper = get_zookeeper(); - zookeeper->createIfNotExists(zookeeper_path + "/error", current_host + "|" + error_message); + return ready_hosts_results; } } diff --git a/src/Backups/BackupCoordinationHelpers.h b/src/Backups/BackupCoordinationHelpers.h index b0cd0440b98..2e9e4b3cbde 100644 --- a/src/Backups/BackupCoordinationHelpers.h +++ b/src/Backups/BackupCoordinationHelpers.h @@ -24,7 +24,7 @@ public: /// getPartNames(). /// Checksums are used only to control that parts under the same names on different replicas are the same. void addPartNames( - const String & table_zk_path, + const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums); @@ -32,7 +32,7 @@ public: /// Returns the names of the parts which a specified replica of a replicated table should put to the backup. /// This is the same list as it was added by call of the function addPartNames() but without duplications and without /// parts covered by another parts. - Strings getPartNames(const String & table_zk_path, const String & replica_name) const; + Strings getPartNames(const String & table_shared_id, const String & replica_name) const; private: void preparePartNames() const; @@ -52,22 +52,26 @@ private: std::unique_ptr covered_parts_finder; }; - std::map table_infos; /// Should be ordered because we need this map to be in the same order on every replica. + std::map table_infos; /// Should be ordered because we need this map to be in the same order on every replica. mutable bool part_names_prepared = false; }; /// Helps to wait until all hosts come to a specified stage. -class BackupCoordinationStageSync +class BackupCoordinationStatusSync { public: - BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_); + BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_); - void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout); - void syncStageError(const String & current_host, const String & error_message); + void set(const String & current_host, const String & new_status, const String & message); + Strings setAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts); + Strings setAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms); + + static constexpr const char * kErrorStatus = "error"; private: void createRootNodes(); + Strings setImpl(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, const std::optional & timeout_ms); String zookeeper_path; zkutil::GetZooKeeper get_zookeeper; diff --git a/src/Backups/BackupCoordinationLocal.cpp b/src/Backups/BackupCoordinationLocal.cpp index 55a3c671a6e..a7d5602ca30 100644 --- a/src/Backups/BackupCoordinationLocal.cpp +++ b/src/Backups/BackupCoordinationLocal.cpp @@ -13,43 +13,80 @@ using FileInfo = IBackupCoordination::FileInfo; BackupCoordinationLocal::BackupCoordinationLocal() = default; BackupCoordinationLocal::~BackupCoordinationLocal() = default; -void BackupCoordinationLocal::syncStage(const String &, int, const Strings &, std::chrono::seconds) +void BackupCoordinationLocal::setStatus(const String &, const String &, const String &) { } -void BackupCoordinationLocal::syncStageError(const String &, const String &) +Strings BackupCoordinationLocal::setStatusAndWait(const String &, const String &, const String &, const Strings &) { + return {}; } -void BackupCoordinationLocal::addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) +Strings BackupCoordinationLocal::setStatusAndWaitFor(const String &, const String &, const String &, const Strings &, UInt64) +{ + return {}; +} + +void BackupCoordinationLocal::addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) { std::lock_guard lock{mutex}; - replicated_part_names.addPartNames(table_zk_path, table_name_for_logs, replica_name, part_names_and_checksums); + replicated_part_names.addPartNames(table_shared_id, table_name_for_logs, replica_name, part_names_and_checksums); } -Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const +Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const { std::lock_guard lock{mutex}; - return replicated_part_names.getPartNames(table_zk_path, replica_name); + return replicated_part_names.getPartNames(table_shared_id, replica_name); } -void BackupCoordinationLocal::addReplicatedDataPath(const String & table_zk_path, const String & data_path) +void BackupCoordinationLocal::addReplicatedDataPath(const String & table_shared_id, const String & data_path) { std::lock_guard lock{mutex}; - replicated_data_paths[table_zk_path].push_back(data_path); + replicated_data_paths[table_shared_id].push_back(data_path); } -Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_zk_path) const +Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_shared_id) const { std::lock_guard lock{mutex}; - auto it = replicated_data_paths.find(table_zk_path); + auto it = replicated_data_paths.find(table_shared_id); if (it == replicated_data_paths.end()) return {}; return it->second; } +void BackupCoordinationLocal::addReplicatedAccessPath(const String & access_zk_path, const String & file_path) +{ + std::lock_guard lock{mutex}; + replicated_access_paths[access_zk_path].push_back(file_path); +} + +Strings BackupCoordinationLocal::getReplicatedAccessPaths(const String & access_zk_path) const +{ + std::lock_guard lock{mutex}; + auto it = replicated_access_paths.find(access_zk_path); + if (it == replicated_access_paths.end()) + return {}; + return it->second; +} + +void BackupCoordinationLocal::setReplicatedAccessHost(const String & access_zk_path, const String & host_id) +{ + std::lock_guard lock{mutex}; + replicated_access_hosts[access_zk_path] = host_id; +} + +String BackupCoordinationLocal::getReplicatedAccessHost(const String & access_zk_path) const +{ + std::lock_guard lock{mutex}; + auto it = replicated_access_hosts.find(access_zk_path); + if (it == replicated_access_hosts.end()) + return {}; + return it->second; +} + + void BackupCoordinationLocal::addFileInfo(const FileInfo & file_info, bool & is_data_file_required) { std::lock_guard lock{mutex}; diff --git a/src/Backups/BackupCoordinationLocal.h b/src/Backups/BackupCoordinationLocal.h index 6529184c61a..dcd6505a438 100644 --- a/src/Backups/BackupCoordinationLocal.h +++ b/src/Backups/BackupCoordinationLocal.h @@ -19,15 +19,22 @@ public: BackupCoordinationLocal(); ~BackupCoordinationLocal() override; - void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) override; - void syncStageError(const String & current_host, const String & error_message) override; + void setStatus(const String & current_host, const String & new_status, const String & message) override; + Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) override; + Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) override; - void addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, + void addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) override; - Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const override; + Strings getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const override; - void addReplicatedDataPath(const String & table_zk_path, const String & data_path) override; - Strings getReplicatedDataPaths(const String & table_zk_path) const override; + void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override; + Strings getReplicatedDataPaths(const String & table_shared_id) const override; + + void addReplicatedAccessPath(const String & access_zk_path, const String & file_path) override; + Strings getReplicatedAccessPaths(const String & access_zk_path) const override; + + void setReplicatedAccessHost(const String & access_zk_path, const String & host_id) override; + String getReplicatedAccessHost(const String & access_zk_path) const override; void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override; void updateFileInfo(const FileInfo & file_info) override; @@ -47,6 +54,8 @@ private: mutable std::mutex mutex; BackupCoordinationReplicatedPartNames replicated_part_names TSA_GUARDED_BY(mutex); std::unordered_map replicated_data_paths TSA_GUARDED_BY(mutex); + std::unordered_map replicated_access_paths TSA_GUARDED_BY(mutex); + std::unordered_map replicated_access_hosts TSA_GUARDED_BY(mutex); std::map file_names TSA_GUARDED_BY(mutex); /// Should be ordered alphabetically, see listFiles(). For empty files we assume checksum = 0. std::map file_infos TSA_GUARDED_BY(mutex); /// Information about files. Without empty files. Strings archive_suffixes TSA_GUARDED_BY(mutex); diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 322bc00ee3c..d5ed9e0da2b 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -1,14 +1,18 @@ #include #include #include +#include #include +#include #include #include #include #include #include +#include #include #include +#include #include #include #include @@ -21,35 +25,63 @@ namespace DB namespace ErrorCodes { - extern const int CANNOT_COLLECT_OBJECTS_FOR_BACKUP; + extern const int INCONSISTENT_METADATA_FOR_BACKUP; extern const int CANNOT_BACKUP_TABLE; extern const int TABLE_IS_DROPPED; + extern const int UNKNOWN_TABLE; extern const int LOGICAL_ERROR; } - -bool BackupEntriesCollector::TableKey::operator ==(const TableKey & right) const +namespace { - return (name == right.name) && (is_temporary == right.is_temporary); -} + /// Finding all tables and databases which we're going to put to the backup and collecting their metadata. + constexpr const char * kGatheringMetadataStatus = "gathering metadata"; -bool BackupEntriesCollector::TableKey::operator <(const TableKey & right) const -{ - return (name < right.name) || ((name == right.name) && (is_temporary < right.is_temporary)); -} + /// Making temporary hard links and prepare backup entries. + constexpr const char * kExtractingDataFromTablesStatus = "extracting data from tables"; -std::string_view BackupEntriesCollector::toString(Stage stage) -{ - switch (stage) + /// Running special tasks for replicated tables which can also prepare some backup entries. + constexpr const char * kRunningPostTasksStatus = "running post-tasks"; + + /// Writing backup entries to the backup and removing temporary hard links. + constexpr const char * kWritingBackupStatus = "writing backup"; + + /// Error status. + constexpr const char * kErrorStatus = BackupCoordinationStatusSync::kErrorStatus; + + /// Uppercases the first character of a passed string. + String toUpperFirst(const String & str) { - case Stage::kPreparing: return "Preparing"; - case Stage::kFindingTables: return "Finding tables"; - case Stage::kExtractingDataFromTables: return "Extracting data from tables"; - case Stage::kRunningPostTasks: return "Running post tasks"; - case Stage::kWritingBackup: return "Writing backup"; - case Stage::kError: return "Error"; + String res = str; + res[0] = std::toupper(res[0]); + return res; + } + + /// Outputs "table " or "temporary table " + String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_upper) + { + String str; + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + str = fmt::format("temporary table {}", backQuoteIfNeed(table_name)); + else + str = fmt::format("table {}.{}", backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); + if (first_upper) + str[0] = std::toupper(str[0]); + return str; + } + + /// How long we should sleep after finding an inconsistency error. + std::chrono::milliseconds getSleepTimeAfterInconsistencyError(size_t pass) + { + size_t ms; + if (pass == 1) /* pass is 1-based */ + ms = 0; + else if ((pass % 10) != 1) + ms = 0; + else + ms = 1000; + return std::chrono::milliseconds{ms}; } - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup stage: {}", static_cast(stage)); } @@ -57,36 +89,38 @@ BackupEntriesCollector::BackupEntriesCollector( const ASTBackupQuery::Elements & backup_query_elements_, const BackupSettings & backup_settings_, std::shared_ptr backup_coordination_, - const ContextPtr & context_, - std::chrono::seconds timeout_) + const ContextPtr & context_) : backup_query_elements(backup_query_elements_) , backup_settings(backup_settings_) , backup_coordination(backup_coordination_) , context(context_) - , timeout(timeout_) + , consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 300000)) , log(&Poco::Logger::get("BackupEntriesCollector")) { } BackupEntriesCollector::~BackupEntriesCollector() = default; -BackupEntries BackupEntriesCollector::getBackupEntries() +BackupEntries BackupEntriesCollector::run() { try { - /// getBackupEntries() must not be called multiple times. - if (current_stage != Stage::kPreparing) + /// run() can be called onle once. + if (!current_status.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Already making backup entries"); - /// Calculate the root path for collecting backup entries, it's either empty or has the format "shards//replicas//". - calculateRootPathInBackup(); + /// Find other hosts working along with us to execute this ON CLUSTER query. + all_hosts + = BackupSettings::Util::filterHostIDs(backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num); /// Do renaming in the create queries according to the renaming config. renaming_map = makeRenamingMapFromBackupQuery(backup_query_elements); + /// Calculate the root path for collecting backup entries, it's either empty or has the format "shards//replicas//". + calculateRootPathInBackup(); + /// Find databases and tables which we're going to put to the backup. - setStage(Stage::kFindingTables); - collectDatabasesAndTablesInfo(); + gatherMetadataAndCheckConsistency(); /// Make backup entries for the definitions of the found databases. makeBackupEntriesForDatabasesDefs(); @@ -95,15 +129,15 @@ BackupEntries BackupEntriesCollector::getBackupEntries() makeBackupEntriesForTablesDefs(); /// Make backup entries for the data of the found tables. - setStage(Stage::kExtractingDataFromTables); + setStatus(kExtractingDataFromTablesStatus); makeBackupEntriesForTablesData(); /// Run all the tasks added with addPostCollectingTask(). - setStage(Stage::kRunningPostTasks); - runPostCollectingTasks(); + setStatus(kRunningPostTasksStatus); + runPostTasks(); /// No more backup entries or tasks are allowed after this point. - setStage(Stage::kWritingBackup); + setStatus(kWritingBackupStatus); return std::move(backup_entries); } @@ -111,7 +145,7 @@ BackupEntries BackupEntriesCollector::getBackupEntries() { try { - setStage(Stage::kError, getCurrentExceptionMessage(false)); + setStatus(kErrorStatus, getCurrentExceptionMessage(false)); } catch (...) { @@ -120,24 +154,34 @@ BackupEntries BackupEntriesCollector::getBackupEntries() } } -void BackupEntriesCollector::setStage(Stage new_stage, const String & error_message) +Strings BackupEntriesCollector::setStatus(const String & new_status, const String & message) { - if (new_stage == Stage::kError) - LOG_ERROR(log, "{} failed with error: {}", toString(current_stage), error_message); - else - LOG_TRACE(log, "{}", toString(new_stage)); - - current_stage = new_stage; - - if (new_stage == Stage::kError) + if (new_status == kErrorStatus) { - backup_coordination->syncStageError(backup_settings.host_id, error_message); + LOG_ERROR(log, "{} failed with error: {}", toUpperFirst(current_status), message); + backup_coordination->setStatus(backup_settings.host_id, new_status, message); + return {}; } else { - auto all_hosts - = BackupSettings::Util::filterHostIDs(backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num); - backup_coordination->syncStage(backup_settings.host_id, static_cast(new_stage), all_hosts, timeout); + LOG_TRACE(log, "{}", toUpperFirst(new_status)); + current_status = new_status; + if (new_status.starts_with(kGatheringMetadataStatus)) + { + auto now = std::chrono::steady_clock::now(); + auto end_of_timeout = std::max(now, consistent_metadata_snapshot_start_time + consistent_metadata_snapshot_timeout); + + return backup_coordination->setStatusAndWaitFor( + backup_settings.host_id, + new_status, + message, + all_hosts, + std::chrono::duration_cast(end_of_timeout - now).count()); + } + else + { + return backup_coordination->setStatusAndWait(backup_settings.host_id, new_status, message, all_hosts); + } } } @@ -156,287 +200,444 @@ void BackupEntriesCollector::calculateRootPathInBackup() } /// Finds databases and tables which we will put to the backup. -void BackupEntriesCollector::collectDatabasesAndTablesInfo() +void BackupEntriesCollector::gatherMetadataAndCheckConsistency() { - bool use_timeout = (timeout.count() >= 0); - auto start_time = std::chrono::steady_clock::now(); + consistent_metadata_snapshot_start_time = std::chrono::steady_clock::now(); + auto end_of_timeout = consistent_metadata_snapshot_start_time + consistent_metadata_snapshot_timeout; + setStatus(fmt::format("{} ({})", kGatheringMetadataStatus, 1)); - int pass = 0; - do + for (size_t pass = 1;; ++pass) { - database_infos.clear(); - table_infos.clear(); - consistent = true; - - /// Collect information about databases and tables specified in the BACKUP query. - for (const auto & element : backup_query_elements) + String new_status = fmt::format("{} ({})", kGatheringMetadataStatus, pass + 1); + std::optional inconsistency_error; + if (tryGatherMetadataAndCompareWithPrevious(inconsistency_error)) { - switch (element.type) + /// Gathered metadata and checked consistency, cool! But we have to check that other hosts cope with that too. + auto all_hosts_results = setStatus(new_status, "consistent"); + + std::optional host_with_inconsistency; + std::optional inconsistency_error_on_other_host; + for (size_t i = 0; i != all_hosts.size(); ++i) { - case ASTBackupQuery::ElementType::TABLE: + if ((i < all_hosts_results.size()) && (all_hosts_results[i] != "consistent")) { - collectTableInfo({element.database_name, element.table_name}, false, element.partitions, true); - break; - } - - case ASTBackupQuery::ElementType::TEMPORARY_TABLE: - { - collectTableInfo({"", element.table_name}, true, element.partitions, true); - break; - } - - case ASTBackupQuery::ElementType::DATABASE: - { - collectDatabaseInfo(element.database_name, element.except_tables, true); - break; - } - - case ASTBackupQuery::ElementType::ALL: - { - collectAllDatabasesInfo(element.except_databases, element.except_tables); + host_with_inconsistency = all_hosts[i]; + inconsistency_error_on_other_host = all_hosts_results[i]; break; } } + + if (!host_with_inconsistency) + break; /// All hosts managed to gather metadata and everything is consistent, so we can go further to writing the backup. + + inconsistency_error = Exception{ + ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, + "Found inconsistency on host {}: {}", + *host_with_inconsistency, + *inconsistency_error_on_other_host}; } - - /// We have to check consistency of collected information to protect from the case when some table or database is - /// renamed during this collecting making the collected information invalid. - checkConsistency(); - - /// Two passes is absolute minimum (see `previous_table_names` & `previous_database_names`). - auto elapsed = std::chrono::steady_clock::now() - start_time; - if (!consistent && (pass >= 2) && use_timeout) + else { - if (elapsed > timeout) - throw Exception( - ErrorCodes::CANNOT_COLLECT_OBJECTS_FOR_BACKUP, - "Couldn't collect tables and databases to make a backup (pass #{}, elapsed {})", - pass, - to_string(elapsed)); + /// Failed to gather metadata or something wasn't consistent. We'll let other hosts know that and try again. + setStatus(new_status, inconsistency_error->displayText()); } + /// Two passes is minimum (we need to compare with table names with previous ones to be sure we don't miss anything). if (pass >= 2) - LOG_WARNING(log, "Couldn't collect tables and databases to make a backup (pass #{}, elapsed {})", pass, to_string(elapsed)); - ++pass; - } while (!consistent); + { + if (std::chrono::steady_clock::now() > end_of_timeout) + inconsistency_error->rethrow(); + else + LOG_WARNING(log, "{}", inconsistency_error->displayText()); + } + + auto sleep_time = getSleepTimeAfterInconsistencyError(pass); + if (sleep_time.count() > 0) + sleepForNanoseconds(std::chrono::duration_cast(sleep_time).count()); + } LOG_INFO(log, "Will backup {} databases and {} tables", database_infos.size(), table_infos.size()); } -void BackupEntriesCollector::collectTableInfo( - const QualifiedTableName & table_name, bool is_temporary_table, const std::optional & partitions, bool throw_if_not_found) +bool BackupEntriesCollector::tryGatherMetadataAndCompareWithPrevious(std::optional & inconsistency_error) { - /// Gather information about the table. - DatabasePtr database; - StoragePtr storage; - TableLockHolder table_lock; - ASTPtr create_table_query; - - TableKey table_key{table_name, is_temporary_table}; - - if (throw_if_not_found) + try { - auto resolved_id = is_temporary_table - ? context->resolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal) - : context->resolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal); - std::tie(database, storage) = DatabaseCatalog::instance().getDatabaseAndTable(resolved_id, context); - table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); - create_table_query = storage->getCreateQueryForBackup(*this); + /// Collect information about databases and tables specified in the BACKUP query. + database_infos.clear(); + table_infos.clear(); + gatherDatabasesMetadata(); + gatherTablesMetadata(); } - else + catch (Exception & e) { - auto resolved_id = is_temporary_table - ? context->tryResolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal) - : context->tryResolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal); - if (!resolved_id.empty()) - std::tie(database, storage) = DatabaseCatalog::instance().tryGetDatabaseAndTable(resolved_id, context); + if (e.code() != ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP) + throw; + inconsistency_error = e; + return false; + } + + /// We have to check consistency of collected information to protect from the case when some table or database is + /// renamed during this collecting making the collected information invalid. + return compareWithPrevious(inconsistency_error); +} + +void BackupEntriesCollector::gatherDatabasesMetadata() +{ + /// Collect information about databases and tables specified in the BACKUP query. + for (const auto & element : backup_query_elements) + { + switch (element.type) + { + case ASTBackupQuery::ElementType::TABLE: + { + gatherDatabaseMetadata( + element.database_name, + /* throw_if_database_not_found= */ true, + /* backup_create_database_query= */ false, + element.table_name, + /* throw_if_table_not_found= */ true, + element.partitions, + /* all_tables= */ false, + /* except_table_names= */ {}); + break; + } + + case ASTBackupQuery::ElementType::TEMPORARY_TABLE: + { + gatherDatabaseMetadata( + DatabaseCatalog::TEMPORARY_DATABASE, + /* throw_if_database_not_found= */ true, + /* backup_create_database_query= */ false, + element.table_name, + /* throw_if_table_not_found= */ true, + element.partitions, + /* all_tables= */ false, + /* except_table_names= */ {}); + break; + } + + case ASTBackupQuery::ElementType::DATABASE: + { + gatherDatabaseMetadata( + element.database_name, + /* throw_if_database_not_found= */ true, + /* backup_create_database_query= */ true, + /* table_name= */ {}, + /* throw_if_table_not_found= */ false, + /* partitions= */ {}, + /* all_tables= */ true, + /* except_table_names= */ element.except_tables); + break; + } + + case ASTBackupQuery::ElementType::ALL: + { + for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases()) + { + if (!element.except_databases.contains(database_name)) + { + gatherDatabaseMetadata( + database_name, + /* throw_if_database_not_found= */ false, + /* backup_create_database_query= */ true, + /* table_name= */ {}, + /* throw_if_table_not_found= */ false, + /* partitions= */ {}, + /* all_tables= */ true, + /* except_table_names= */ element.except_tables); + } + } + break; + } + } + } +} + +void BackupEntriesCollector::gatherDatabaseMetadata( + const String & database_name, + bool throw_if_database_not_found, + bool backup_create_database_query, + const std::optional & table_name, + bool throw_if_table_not_found, + const std::optional & partitions, + bool all_tables, + const std::set & except_table_names) +{ + auto it = database_infos.find(database_name); + if (it == database_infos.end()) + { + DatabasePtr database; + if (throw_if_database_not_found) + { + database = DatabaseCatalog::instance().getDatabase(database_name); + } + else + { + database = DatabaseCatalog::instance().tryGetDatabase(database_name); + if (!database) + return; + } + + DatabaseInfo new_database_info; + new_database_info.database = database; + it = database_infos.emplace(database_name, new_database_info).first; + } + + DatabaseInfo & database_info = it->second; + + if (backup_create_database_query && !database_info.create_database_query && (database_name != DatabaseCatalog::TEMPORARY_DATABASE)) + { + ASTPtr create_database_query; + try + { + create_database_query = database_info.database->getCreateDatabaseQuery(); + } + catch (...) + { + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't get a create query for database {}", database_name); + } + + database_info.create_database_query = create_database_query; + const auto & create = create_database_query->as(); + + if (create.getDatabase() != database_name) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected name {} for database {}", backQuoteIfNeed(create.getDatabase()), backQuoteIfNeed(database_name)); + + String new_database_name = renaming_map.getNewDatabaseName(database_name); + database_info.metadata_path_in_backup = root_path_in_backup / "metadata" / (escapeForFileName(new_database_name) + ".sql"); + } + + if (table_name) + { + auto & table_params = database_info.tables[*table_name]; + if (throw_if_table_not_found) + table_params.throw_if_table_not_found = true; + if (partitions) + { + table_params.partitions.emplace(); + insertAtEnd(*table_params.partitions, *partitions); + } + database_info.except_table_names.emplace(*table_name); + } + + if (all_tables) + { + database_info.all_tables = all_tables; + for (const auto & except_table_name : except_table_names) + if (except_table_name.first == database_name) + database_info.except_table_names.emplace(except_table_name.second); + } +} + +void BackupEntriesCollector::gatherTablesMetadata() +{ + table_infos.clear(); + for (const auto & [database_name, database_info] : database_infos) + { + const auto & database = database_info.database; + bool is_temporary_database = (database_name == DatabaseCatalog::TEMPORARY_DATABASE); + + auto filter_by_table_name = [database_info = &database_info](const String & table_name) + { + /// We skip inner tables of materialized views. + if (table_name.starts_with(".inner_id.")) + return false; + + if (database_info->tables.contains(table_name)) + return true; + + if (database_info->all_tables) + return !database_info->except_table_names.contains(table_name); + + return false; + }; + + auto db_tables = database->getTablesForBackup(filter_by_table_name, context); + + std::unordered_set found_table_names; + for (const auto & db_table : db_tables) + { + const auto & create_table_query = db_table.first; + const auto & create = create_table_query->as(); + found_table_names.emplace(create.getTable()); + + if (is_temporary_database && !create.temporary) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a non-temporary create query for {}", tableNameWithTypeToString(database_name, create.getTable(), false)); + + if (!is_temporary_database && (create.getDatabase() != database_name)) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected database name {} for {}", backQuoteIfNeed(create.getDatabase()), tableNameWithTypeToString(database_name, create.getTable(), false)); + } + + /// Check that all tables were found. + for (const auto & [table_name, table_info] : database_info.tables) + { + if (table_info.throw_if_table_not_found && !found_table_names.contains(table_name)) + throw Exception(ErrorCodes::UNKNOWN_TABLE, "{} not found", tableNameWithTypeToString(database_name, table_name, true)); + } + + for (const auto & db_table : db_tables) + { + const auto & create_table_query = db_table.first; + const auto & storage = db_table.second; + const auto & create = create_table_query->as(); + String table_name = create.getTable(); + + fs::path metadata_path_in_backup, data_path_in_backup; + auto table_name_in_backup = renaming_map.getNewTableName({database_name, table_name}); + if (table_name_in_backup.database == DatabaseCatalog::TEMPORARY_DATABASE) + { + metadata_path_in_backup = root_path_in_backup / "temporary_tables" / "metadata" / (escapeForFileName(table_name_in_backup.table) + ".sql"); + data_path_in_backup = root_path_in_backup / "temporary_tables" / "data" / escapeForFileName(table_name_in_backup.table); + } + else + { + metadata_path_in_backup + = root_path_in_backup / "metadata" / escapeForFileName(table_name_in_backup.database) / (escapeForFileName(table_name_in_backup.table) + ".sql"); + data_path_in_backup = root_path_in_backup / "data" / escapeForFileName(table_name_in_backup.database) + / escapeForFileName(table_name_in_backup.table); + } + + /// Add information to `table_infos`. + auto & res_table_info = table_infos[QualifiedTableName{database_name, table_name}]; + res_table_info.database = database; + res_table_info.storage = storage; + res_table_info.create_table_query = create_table_query; + res_table_info.metadata_path_in_backup = metadata_path_in_backup; + res_table_info.data_path_in_backup = data_path_in_backup; + + if (!backup_settings.structure_only) + { + auto it = database_info.tables.find(table_name); + if (it != database_info.tables.end()) + { + const auto & partitions = it->second.partitions; + if (partitions && !storage->supportsBackupPartition()) + { + throw Exception( + ErrorCodes::CANNOT_BACKUP_TABLE, + "Table engine {} doesn't support partitions, cannot backup {}", + storage->getName(), + tableNameWithTypeToString(database_name, table_name, false)); + } + res_table_info.partitions = partitions; + } + } + } + } +} + +void BackupEntriesCollector::lockTablesForReading() +{ + for (auto & [table_name, table_info] : table_infos) + { + auto storage = table_info.storage; + TableLockHolder table_lock; if (storage) { try { table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); - create_table_query = storage->getCreateQueryForBackup(*this); } catch (Exception & e) { if (e.code() != ErrorCodes::TABLE_IS_DROPPED) throw; - } - } - - if (!create_table_query) - { - consistent &= !table_infos.contains(table_key); - return; - } - } - - fs::path data_path_in_backup; - if (is_temporary_table) - { - auto table_name_in_backup = renaming_map.getNewTemporaryTableName(table_name.table); - data_path_in_backup = root_path_in_backup / "temporary_tables" / "data" / escapeForFileName(table_name_in_backup); - } - else - { - auto table_name_in_backup = renaming_map.getNewTableName(table_name); - data_path_in_backup - = root_path_in_backup / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table); - } - - /// Check that information is consistent. - const auto & create = create_table_query->as(); - if ((create.getTable() != table_name.table) || (is_temporary_table != create.temporary) || (create.getDatabase() != table_name.database)) - { - /// Table was renamed recently. - consistent = false; - return; - } - - if (auto it = table_infos.find(table_key); it != table_infos.end()) - { - const auto & table_info = it->second; - if ((table_info.database != database) || (table_info.storage != storage)) - { - /// Table was renamed recently. - consistent = false; - return; - } - } - - /// Add information to `table_infos`. - auto & res_table_info = table_infos[table_key]; - res_table_info.database = database; - res_table_info.storage = storage; - res_table_info.table_lock = table_lock; - res_table_info.create_table_query = create_table_query; - res_table_info.data_path_in_backup = data_path_in_backup; - - if (partitions) - { - if (!res_table_info.partitions) - res_table_info.partitions.emplace(); - insertAtEnd(*res_table_info.partitions, *partitions); - } -} - -void BackupEntriesCollector::collectDatabaseInfo(const String & database_name, const std::set & except_table_names, bool throw_if_not_found) -{ - /// Gather information about the database. - DatabasePtr database; - ASTPtr create_database_query; - - if (throw_if_not_found) - { - database = DatabaseCatalog::instance().getDatabase(database_name); - create_database_query = database->getCreateDatabaseQueryForBackup(); - } - else - { - database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (!database) - { - consistent &= !database_infos.contains(database_name); - return; - } - - try - { - create_database_query = database->getCreateDatabaseQueryForBackup(); - } - catch (...) - { - /// The database has been dropped recently. - consistent &= !database_infos.contains(database_name); - return; - } - } - - /// Check that information is consistent. - const auto & create = create_database_query->as(); - if (create.getDatabase() != database_name) - { - /// Database was renamed recently. - consistent = false; - return; - } - - if (auto it = database_infos.find(database_name); it != database_infos.end()) - { - const auto & database_info = it->second; - if (database_info.database != database) - { - /// Database was renamed recently. - consistent = false; - return; - } - } - - /// Add information to `database_infos`. - auto & res_database_info = database_infos[database_name]; - res_database_info.database = database; - res_database_info.create_database_query = create_database_query; - - /// Add information about tables too. - for (auto it = database->getTablesIteratorForBackup(*this); it->isValid(); it->next()) - { - if (except_table_names.contains({database_name, it->name()})) - continue; - - collectTableInfo({database_name, it->name()}, /* is_temporary_table= */ false, {}, /* throw_if_not_found= */ false); - if (!consistent) - return; - } -} - -void BackupEntriesCollector::collectAllDatabasesInfo(const std::set & except_database_names, const std::set & except_table_names) -{ - for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases()) - { - if (except_database_names.contains(database_name)) - continue; - collectDatabaseInfo(database_name, except_table_names, false); - if (!consistent) - return; - } -} - -/// Check for consistency of collected information about databases and tables. -void BackupEntriesCollector::checkConsistency() -{ - if (!consistent) - return; /// Already inconsistent, no more checks necessary - - /// Databases found while we were scanning tables and while we were scanning databases - must be the same. - for (const auto & [key, table_info] : table_infos) - { - auto it = database_infos.find(key.name.database); - if (it != database_infos.end()) - { - const auto & database_info = it->second; - if (database_info.database != table_info.database) - { - consistent = false; - return; + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} is dropped", tableNameWithTypeToString(table_name.database, table_name.table, true)); } } } +} +/// Check consistency of collected information about databases and tables. +bool BackupEntriesCollector::compareWithPrevious(std::optional & inconsistency_error) +{ /// We need to scan tables at least twice to be sure that we haven't missed any table which could be renamed /// while we were scanning. - std::set database_names; - std::set table_names; - boost::range::copy(database_infos | boost::adaptors::map_keys, std::inserter(database_names, database_names.end())); - boost::range::copy(table_infos | boost::adaptors::map_keys, std::inserter(table_names, table_names.end())); + std::vector> databases_metadata; + std::vector> tables_metadata; + databases_metadata.reserve(database_infos.size()); + tables_metadata.reserve(table_infos.size()); + for (const auto & [database_name, database_info] : database_infos) + databases_metadata.emplace_back(database_name, database_info.create_database_query ? serializeAST(*database_info.create_database_query) : ""); + for (const auto & [table_name, table_info] : table_infos) + tables_metadata.emplace_back(table_name, serializeAST(*table_info.create_table_query)); - if (!previous_database_names || !previous_table_names || (*previous_database_names != database_names) - || (*previous_table_names != table_names)) + /// We need to sort the lists to make the comparison below correct. + ::sort(databases_metadata.begin(), databases_metadata.end()); + ::sort(tables_metadata.begin(), tables_metadata.end()); + + SCOPE_EXIT({ + previous_databases_metadata = std::move(databases_metadata); + previous_tables_metadata = std::move(tables_metadata); + }); + + /// Databases must be the same as during the previous scan. + if (databases_metadata != previous_databases_metadata) { - previous_database_names = std::move(database_names); - previous_table_names = std::move(table_names); - consistent = false; + std::vector> difference; + difference.reserve(databases_metadata.size()); + std::set_difference(databases_metadata.begin(), databases_metadata.end(), previous_databases_metadata.begin(), + previous_databases_metadata.end(), std::back_inserter(difference)); + + if (!difference.empty()) + { + inconsistency_error = Exception{ + ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, + "Database {} were created or changed its definition during scanning", + backQuoteIfNeed(difference[0].first)}; + return false; + } + + difference.clear(); + difference.reserve(previous_databases_metadata.size()); + std::set_difference(previous_databases_metadata.begin(), previous_databases_metadata.end(), databases_metadata.begin(), + databases_metadata.end(), std::back_inserter(difference)); + + if (!difference.empty()) + { + inconsistency_error = Exception{ + ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, + "Database {} were removed or changed its definition during scanning", + backQuoteIfNeed(difference[0].first)}; + return false; + } } + + /// Tables must be the same as during the previous scan. + if (tables_metadata != previous_tables_metadata) + { + std::vector> difference; + difference.reserve(tables_metadata.size()); + std::set_difference(tables_metadata.begin(), tables_metadata.end(), previous_tables_metadata.begin(), + previous_tables_metadata.end(), std::back_inserter(difference)); + + if (!difference.empty()) + { + inconsistency_error = Exception{ + ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, + "{} were created or changed its definition during scanning", + tableNameWithTypeToString(difference[0].first.database, difference[0].first.table, true)}; + return false; + } + + difference.clear(); + difference.reserve(previous_tables_metadata.size()); + std::set_difference(previous_tables_metadata.begin(), previous_tables_metadata.end(), tables_metadata.begin(), + tables_metadata.end(), std::back_inserter(difference)); + + if (!difference.empty()) + { + inconsistency_error = Exception{ + ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, + "{} were removed or changed its definition during scanning", + tableNameWithTypeToString(difference[0].first.database, difference[0].first.table, true)}; + return false; + } + } + + return true; } /// Make backup entries for all the definitions of all the databases found. @@ -444,14 +645,16 @@ void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs() { for (const auto & [database_name, database_info] : database_infos) { + if (!database_info.create_database_query) + continue; /// We store CREATE DATABASE queries only if there was BACKUP DATABASE specified. + LOG_TRACE(log, "Adding definition of database {}", backQuoteIfNeed(database_name)); ASTPtr new_create_query = database_info.create_database_query; - renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, new_create_query); - - String new_database_name = renaming_map.getNewDatabaseName(database_name); - auto metadata_path_in_backup = root_path_in_backup / "metadata" / (escapeForFileName(new_database_name) + ".sql"); + adjustCreateQueryForBackup(new_create_query, context->getGlobalContext(), nullptr); + renameDatabaseAndTableNameInCreateQuery(new_create_query, renaming_map, context->getGlobalContext()); + const String & metadata_path_in_backup = database_info.metadata_path_in_backup; backup_entries.emplace_back(metadata_path_in_backup, std::make_shared(serializeAST(*new_create_query))); } } @@ -459,26 +662,15 @@ void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs() /// Calls IDatabase::backupTable() for all the tables found to make backup entries for tables. void BackupEntriesCollector::makeBackupEntriesForTablesDefs() { - for (const auto & [key, table_info] : table_infos) + for (auto & [table_name, table_info] : table_infos) { - LOG_TRACE(log, "Adding definition of {}table {}", (key.is_temporary ? "temporary " : ""), key.name.getFullName()); + LOG_TRACE(log, "Adding definition of {}", tableNameWithTypeToString(table_name.database, table_name.table, false)); ASTPtr new_create_query = table_info.create_table_query; - renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, new_create_query); - - fs::path metadata_path_in_backup; - if (key.is_temporary) - { - auto new_name = renaming_map.getNewTemporaryTableName(key.name.table); - metadata_path_in_backup = root_path_in_backup / "temporary_tables" / "metadata" / (escapeForFileName(new_name) + ".sql"); - } - else - { - auto new_name = renaming_map.getNewTableName(key.name); - metadata_path_in_backup - = root_path_in_backup / "metadata" / escapeForFileName(new_name.database) / (escapeForFileName(new_name.table) + ".sql"); - } + adjustCreateQueryForBackup(new_create_query, context->getGlobalContext(), &table_info.replicated_table_shared_id); + renameDatabaseAndTableNameInCreateQuery(new_create_query, renaming_map, context->getGlobalContext()); + const String & metadata_path_in_backup = table_info.metadata_path_in_backup; backup_entries.emplace_back(metadata_path_in_backup, std::make_shared(serializeAST(*new_create_query))); } } @@ -488,63 +680,76 @@ void BackupEntriesCollector::makeBackupEntriesForTablesData() if (backup_settings.structure_only) return; - for (const auto & [key, table_info] : table_infos) + for (const auto & [table_name, table_info] : table_infos) { - LOG_TRACE(log, "Adding data of {}table {}", (key.is_temporary ? "temporary " : ""), key.name.getFullName()); const auto & storage = table_info.storage; const auto & data_path_in_backup = table_info.data_path_in_backup; - const auto & partitions = table_info.partitions; - storage->backupData(*this, data_path_in_backup, partitions); + if (storage) + { + LOG_TRACE(log, "Adding data of {}", tableNameWithTypeToString(table_name.database, table_name.table, false)); + storage->backupData(*this, data_path_in_backup, table_info.partitions); + } + else + { + /// Storage == null means this storage exists on other replicas but it has not been created on this replica yet. + /// If this table is replicated in this case we call IBackupCoordination::addReplicatedDataPath() which will cause + /// other replicas to fill the storage's data in the backup. + /// If this table is not replicated we'll do nothing leaving the storage's data empty in the backup. + if (table_info.replicated_table_shared_id) + backup_coordination->addReplicatedDataPath(*table_info.replicated_table_shared_id, data_path_in_backup); + } } } void BackupEntriesCollector::addBackupEntry(const String & file_name, BackupEntryPtr backup_entry) { - if (current_stage == Stage::kWritingBackup) + if (current_status == kWritingBackupStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed"); backup_entries.emplace_back(file_name, backup_entry); } +void BackupEntriesCollector::addBackupEntry(const std::pair & backup_entry) +{ + addBackupEntry(backup_entry.first, backup_entry.second); +} + void BackupEntriesCollector::addBackupEntries(const BackupEntries & backup_entries_) { - if (current_stage == Stage::kWritingBackup) + if (current_status == kWritingBackupStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed"); insertAtEnd(backup_entries, backup_entries_); } void BackupEntriesCollector::addBackupEntries(BackupEntries && backup_entries_) { - if (current_stage == Stage::kWritingBackup) + if (current_status == kWritingBackupStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed"); insertAtEnd(backup_entries, std::move(backup_entries_)); } -void BackupEntriesCollector::addPostCollectingTask(std::function task) +void BackupEntriesCollector::addPostTask(std::function task) { - if (current_stage == Stage::kWritingBackup) + if (current_status == kWritingBackupStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding post tasks is not allowed"); - post_collecting_tasks.push(std::move(task)); + post_tasks.push(std::move(task)); } /// Runs all the tasks added with addPostCollectingTask(). -void BackupEntriesCollector::runPostCollectingTasks() +void BackupEntriesCollector::runPostTasks() { /// Post collecting tasks can add other post collecting tasks, our code is fine with that. - while (!post_collecting_tasks.empty()) + while (!post_tasks.empty()) { - auto task = std::move(post_collecting_tasks.front()); - post_collecting_tasks.pop(); + auto task = std::move(post_tasks.front()); + post_tasks.pop(); std::move(task)(); } } -void BackupEntriesCollector::throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine) +size_t BackupEntriesCollector::getAccessCounter(AccessEntityType type) { - throw Exception( - ErrorCodes::CANNOT_BACKUP_TABLE, - "Table engine {} doesn't support partitions, cannot backup table {}", - table_engine, - storage_id.getFullTableName()); + access_counters.resize(static_cast(AccessEntityType::MAX)); + return access_counters[static_cast(type)]++; } } diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index 1466815f3a7..9a653ee7e4d 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -19,6 +19,7 @@ class IBackupCoordination; class IDatabase; using DatabasePtr = std::shared_ptr; struct StorageID; +enum class AccessEntityType; /// Collects backup entries for all databases and tables which should be put to a backup. class BackupEntriesCollector : private boost::noncopyable @@ -27,84 +28,90 @@ public: BackupEntriesCollector(const ASTBackupQuery::Elements & backup_query_elements_, const BackupSettings & backup_settings_, std::shared_ptr backup_coordination_, - const ContextPtr & context_, - std::chrono::seconds timeout_ = std::chrono::seconds(-1) /* no timeout */); + const ContextPtr & context_); ~BackupEntriesCollector(); /// Collects backup entries and returns the result. - /// This function first generates a list of databases and then call IDatabase::backup() for each database from this list. - /// At this moment IDatabase::backup() calls IStorage::backup() and they both call addBackupEntry() to build a list of backup entries. - BackupEntries getBackupEntries(); + /// This function first generates a list of databases and then call IDatabase::getTablesForBackup() for each database from this list. + /// Then it calls IStorage::backupData() to build a list of backup entries. + BackupEntries run(); const BackupSettings & getBackupSettings() const { return backup_settings; } std::shared_ptr getBackupCoordination() const { return backup_coordination; } ContextPtr getContext() const { return context; } - /// Adds a backup entry which will be later returned by getBackupEntries(). - /// These function can be called by implementations of IStorage::backup() in inherited storage classes. + /// Adds a backup entry which will be later returned by run(). + /// These function can be called by implementations of IStorage::backupData() in inherited storage classes. void addBackupEntry(const String & file_name, BackupEntryPtr backup_entry); + void addBackupEntry(const std::pair & backup_entry); void addBackupEntries(const BackupEntries & backup_entries_); void addBackupEntries(BackupEntries && backup_entries_); - /// Adds a function which must be called after all IStorage::backup() have finished their work on all hosts. + /// Adds a function which must be called after all IStorage::backupData() have finished their work on all hosts. /// This function is designed to help making a consistent in some complex cases like /// 1) we need to join (in a backup) the data of replicated tables gathered on different hosts. - void addPostCollectingTask(std::function task); + void addPostTask(std::function task); - /// Writing a backup includes a few stages: - enum class Stage - { - /// Initial stage. - kPreparing, - - /// Finding all tables and databases which we're going to put to the backup. - kFindingTables, - - /// Making temporary hard links and prepare backup entries. - kExtractingDataFromTables, - - /// Running special tasks for replicated databases or tables which can also prepare some backup entries. - kRunningPostTasks, - - /// Writing backup entries to the backup and removing temporary hard links. - kWritingBackup, - - /// An error happens during any of the stages above, the backup won't be written. - kError, - }; - static std::string_view toString(Stage stage); - - /// Throws an exception that a specified table engine doesn't support partitions. - [[noreturn]] static void throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine); + /// Returns an incremental counter used to backup access control. + size_t getAccessCounter(AccessEntityType type); private: - void setStage(Stage new_stage, const String & error_message = {}); void calculateRootPathInBackup(); - void collectDatabasesAndTablesInfo(); - void collectTableInfo(const QualifiedTableName & table_name, bool is_temporary_table, const std::optional & partitions, bool throw_if_not_found); - void collectDatabaseInfo(const String & database_name, const std::set & except_table_names, bool throw_if_not_found); - void collectAllDatabasesInfo(const std::set & except_database_names, const std::set & except_table_names); - void checkConsistency(); + + void gatherMetadataAndCheckConsistency(); + + bool tryGatherMetadataAndCompareWithPrevious(std::optional & inconsistency_error); + + void gatherDatabasesMetadata(); + + void gatherDatabaseMetadata( + const String & database_name, + bool throw_if_database_not_found, + bool backup_create_database_query, + const std::optional & table_name, + bool throw_if_table_not_found, + const std::optional & partitions, + bool all_tables, + const std::set & except_table_names); + + void gatherTablesMetadata(); + void lockTablesForReading(); + bool compareWithPrevious(std::optional & inconsistency_error); + void makeBackupEntriesForDatabasesDefs(); void makeBackupEntriesForTablesDefs(); void makeBackupEntriesForTablesData(); - void runPostCollectingTasks(); + void runPostTasks(); + + Strings setStatus(const String & new_status, const String & message = ""); const ASTBackupQuery::Elements backup_query_elements; const BackupSettings backup_settings; std::shared_ptr backup_coordination; ContextPtr context; - std::chrono::seconds timeout; + std::chrono::milliseconds consistent_metadata_snapshot_timeout; Poco::Logger * log; - Stage current_stage = Stage::kPreparing; - std::filesystem::path root_path_in_backup; + Strings all_hosts; DDLRenamingMap renaming_map; + std::filesystem::path root_path_in_backup; struct DatabaseInfo { DatabasePtr database; ASTPtr create_database_query; + String metadata_path_in_backup; + + struct TableParams + { + bool throw_if_table_not_found = false; + std::optional partitions; + }; + + std::unordered_map tables; + + bool all_tables = false; + std::unordered_set except_table_names; }; struct TableInfo @@ -113,26 +120,22 @@ private: StoragePtr storage; TableLockHolder table_lock; ASTPtr create_table_query; + String metadata_path_in_backup; std::filesystem::path data_path_in_backup; + std::optional replicated_table_shared_id; std::optional partitions; }; - struct TableKey - { - QualifiedTableName name; - bool is_temporary = false; - bool operator ==(const TableKey & right) const; - bool operator <(const TableKey & right) const; - }; - + String current_status; + std::chrono::steady_clock::time_point consistent_metadata_snapshot_start_time; std::unordered_map database_infos; - std::map table_infos; - std::optional> previous_database_names; - std::optional> previous_table_names; - bool consistent = false; + std::unordered_map table_infos; + std::vector> previous_databases_metadata; + std::vector> previous_tables_metadata; BackupEntries backup_entries; - std::queue> post_collecting_tasks; + std::queue> post_tasks; + std::vector access_counters; }; } diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index c5de4bd7e67..9ff91050177 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -39,7 +39,7 @@ DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & e const String & new_table_name = element.new_table_name; assert(!table_name.empty()); assert(!new_table_name.empty()); - map.setNewTemporaryTableName(table_name, new_table_name); + map.setNewTableName({DatabaseCatalog::TEMPORARY_DATABASE, table_name}, {DatabaseCatalog::TEMPORARY_DATABASE, new_table_name}); break; } diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 84dc63b4f9f..635b2810941 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -166,9 +166,8 @@ UUID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & c BackupEntries backup_entries; { - auto timeout = std::chrono::seconds{context_in_use->getConfigRef().getInt("backups.backup_prepare_timeout", -1)}; - BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context_in_use, timeout}; - backup_entries = backup_entries_collector.getBackupEntries(); + BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context_in_use}; + backup_entries = backup_entries_collector.run(); } writeBackupEntries(backup, std::move(backup_entries), backups_thread_pool); @@ -272,8 +271,8 @@ UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr conte String addr_database = address->default_database.empty() ? current_database : address->default_database; for (auto & element : restore_elements) element.setCurrentDatabase(addr_database); - RestorerFromBackup dummy_restorer{restore_elements, restore_settings, nullptr, backup, context_in_use, {}}; - dummy_restorer.checkAccessOnly(); + RestorerFromBackup dummy_restorer{restore_elements, restore_settings, nullptr, backup, context_in_use}; + dummy_restorer.run(RestorerFromBackup::CHECK_ACCESS_ONLY); } } @@ -325,11 +324,9 @@ UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr conte DataRestoreTasks data_restore_tasks; { - auto timeout = std::chrono::seconds{context_in_use->getConfigRef().getInt("backups.restore_metadata_timeout", -1)}; RestorerFromBackup restorer{restore_query->elements, restore_settings, restore_coordination, - backup, context_in_use, timeout}; - restorer.restoreMetadata(); - data_restore_tasks = restorer.getDataRestoreTasks(); + backup, context_in_use}; + data_restore_tasks = restorer.run(RestorerFromBackup::RESTORE); } restoreTablesData(std::move(data_restore_tasks), restores_thread_pool); diff --git a/src/Backups/DDLAdjustingForBackupVisitor.cpp b/src/Backups/DDLAdjustingForBackupVisitor.cpp new file mode 100644 index 00000000000..8223e08f127 --- /dev/null +++ b/src/Backups/DDLAdjustingForBackupVisitor.cpp @@ -0,0 +1,113 @@ +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace +{ + void visitStorageSystemTableEngine(ASTStorage &, const DDLAdjustingForBackupVisitor::Data & data) + { + /// Precondition: storage.engine && storage.engine->name.starts_with("System")) + + /// If this is a definition of a system table we'll remove columns and comment because they're redundant for backups. + auto & create = data.create_query->as(); + create.reset(create.columns_list); + create.reset(create.comment); + } + + void visitStorageReplicatedTableEngine(ASTStorage & storage, const DDLAdjustingForBackupVisitor::Data & data) + { + /// Precondition: engine_name.starts_with("Replicated") && engine_name.ends_with("MergeTree") + + if (data.replicated_table_shared_id) + *data.replicated_table_shared_id = StorageReplicatedMergeTree::tryGetTableSharedIDFromCreateQuery(*data.create_query, data.global_context); + + /// Before storing the metadata in a backup we have to find a zookeeper path in its definition and turn the table's UUID in there + /// back into "{uuid}", and also we probably can remove the zookeeper path and replica name if they're default. + /// So we're kind of reverting what we had done to the table's definition in registerStorageMergeTree.cpp before we created this table. + auto & create = data.create_query->as(); + auto & engine = *storage.engine; + + auto * engine_args_ast = typeid_cast(engine.arguments.get()); + if (!engine_args_ast) + return; + + auto & engine_args = engine_args_ast->children; + if (engine_args.size() < 2) + return; + + auto * zookeeper_path_ast = typeid_cast(engine_args[0].get()); + auto * replica_name_ast = typeid_cast(engine_args[1].get()); + if (zookeeper_path_ast && (zookeeper_path_ast->value.getType() == Field::Types::String) && + replica_name_ast && (replica_name_ast->value.getType() == Field::Types::String)) + { + String & zookeeper_path_arg = zookeeper_path_ast->value.get(); + String & replica_name_arg = replica_name_ast->value.get(); + if (create.uuid != UUIDHelpers::Nil) + { + String table_uuid_str = toString(create.uuid); + if (size_t uuid_pos = zookeeper_path_arg.find(table_uuid_str); uuid_pos != String::npos) + zookeeper_path_arg.replace(uuid_pos, table_uuid_str.size(), "{uuid}"); + } + const auto & config = data.global_context->getConfigRef(); + if ((zookeeper_path_arg == StorageReplicatedMergeTree::getDefaultZooKeeperPath(config)) + && (replica_name_arg == StorageReplicatedMergeTree::getDefaultReplicaName(config)) + && ((engine_args.size() == 2) || !engine_args[2]->as())) + { + engine_args.erase(engine_args.begin(), engine_args.begin() + 2); + } + } + } + + void visitStorage(ASTStorage & storage, const DDLAdjustingForBackupVisitor::Data & data) + { + if (!storage.engine) + return; + + const String & engine_name = storage.engine->name; + if (engine_name.starts_with("System")) + visitStorageSystemTableEngine(storage, data); + else if (engine_name.starts_with("Replicated") && engine_name.ends_with("MergeTree")) + visitStorageReplicatedTableEngine(storage, data); + } + + void visitCreateQuery(ASTCreateQuery & create, const DDLAdjustingForBackupVisitor::Data & data) + { + create.uuid = UUIDHelpers::Nil; + create.to_inner_uuid = UUIDHelpers::Nil; + + if (create.storage) + visitStorage(*create.storage, data); + } +} + + +bool DDLAdjustingForBackupVisitor::needChildVisit(const ASTPtr &, const ASTPtr &) +{ + return false; +} + +void DDLAdjustingForBackupVisitor::visit(ASTPtr ast, const Data & data) +{ + if (auto * create = ast->as()) + visitCreateQuery(*create, data); +} + +void adjustCreateQueryForBackup(ASTPtr ast, const ContextPtr & global_context, std::optional * replicated_table_shared_id) +{ + if (replicated_table_shared_id) + *replicated_table_shared_id = {}; + + DDLAdjustingForBackupVisitor::Data data{ast, global_context, replicated_table_shared_id}; + DDLAdjustingForBackupVisitor::Visitor{data}.visit(ast); +} + +} diff --git a/src/Backups/DDLAdjustingForBackupVisitor.h b/src/Backups/DDLAdjustingForBackupVisitor.h new file mode 100644 index 00000000000..63353dcc000 --- /dev/null +++ b/src/Backups/DDLAdjustingForBackupVisitor.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class IAST; +using ASTPtr = std::shared_ptr; +class Context; +using ContextPtr = std::shared_ptr; + +/// Changes a create query to a form which is appropriate or suitable for saving in a backup. +/// Also extracts a replicated table's shared ID from the create query if this is a create query for a replicated table. +/// `replicated_table_shared_id` can be null if you don't need that. +void adjustCreateQueryForBackup(ASTPtr ast, const ContextPtr & global_context, std::optional * replicated_table_shared_id); + +/// Visits ASTCreateQuery and changes it to a form which is appropriate or suitable for saving in a backup. +class DDLAdjustingForBackupVisitor +{ +public: + struct Data + { + ASTPtr create_query; + ContextPtr global_context; + std::optional * replicated_table_shared_id = nullptr; + }; + + using Visitor = InDepthNodeVisitor; + + static bool needChildVisit(const ASTPtr & ast, const ASTPtr & child); + static void visit(ASTPtr ast, const Data & data); +}; + +} diff --git a/src/Backups/IBackupCoordination.h b/src/Backups/IBackupCoordination.h index 92b7139ed5f..b4c5c7b3d88 100644 --- a/src/Backups/IBackupCoordination.h +++ b/src/Backups/IBackupCoordination.h @@ -13,11 +13,10 @@ class IBackupCoordination public: virtual ~IBackupCoordination() = default; - /// Sets the current stage and waits for other hosts to come to this stage too. - virtual void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) = 0; - - /// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage(). - virtual void syncStageError(const String & current_host, const String & error_message) = 0; + /// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts. + virtual void setStatus(const String & current_host, const String & new_status, const String & message) = 0; + virtual Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & other_hosts) = 0; + virtual Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & other_hosts, UInt64 timeout_ms) = 0; struct PartNameAndChecksum { @@ -29,21 +28,29 @@ public: /// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function /// getReplicatedPartNames(). /// Checksums are used only to control that parts under the same names on different replicas are the same. - virtual void addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, + virtual void addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) = 0; /// Returns the names of the parts which a specified replica of a replicated table should put to the backup. /// This is the same list as it was added by call of the function addReplicatedPartNames() but without duplications and without /// parts covered by another parts. - virtual Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const = 0; + virtual Strings getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const = 0; /// Adds a data path in backup for a replicated table. /// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function /// getReplicatedDataPaths(). - virtual void addReplicatedDataPath(const String & table_zk_path, const String & data_path) = 0; + virtual void addReplicatedDataPath(const String & table_shared_id, const String & data_path) = 0; /// Returns all the data paths in backup added for a replicated table (see also addReplicatedDataPath()). - virtual Strings getReplicatedDataPaths(const String & table_zk_path) const = 0; + virtual Strings getReplicatedDataPaths(const String & table_shared_id) const = 0; + + /// Adds a path to access.txt file keeping access entities of a ReplicatedAccessStorage. + virtual void addReplicatedAccessPath(const String & access_zk_path, const String & file_path) = 0; + virtual Strings getReplicatedAccessPaths(const String & access_zk_path) const = 0; + + /// Sets the host id of a host storing access entities of a ReplicatedAccessStorage to backup. + virtual void setReplicatedAccessHost(const String & access_zk_path, const String & host) = 0; + virtual String getReplicatedAccessHost(const String & access_zk_path) const = 0; struct FileInfo { diff --git a/src/Backups/IRestoreCoordination.h b/src/Backups/IRestoreCoordination.h index fd9a67e1b96..ba76a6e0c99 100644 --- a/src/Backups/IRestoreCoordination.h +++ b/src/Backups/IRestoreCoordination.h @@ -13,11 +13,10 @@ class IRestoreCoordination public: virtual ~IRestoreCoordination() = default; - /// Sets the current stage and waits for other hosts to come to this stage too. - virtual void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) = 0; - - /// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage(). - virtual void syncStageError(const String & current_host, const String & error_message) = 0; + /// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts. + virtual void setStatus(const String & current_host, const String & new_status, const String & message) = 0; + virtual Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & other_hosts) = 0; + virtual Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & other_hosts, UInt64 timeout_ms) = 0; /// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table. virtual bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) = 0; diff --git a/src/Backups/RestoreCoordinationDistributed.cpp b/src/Backups/RestoreCoordinationDistributed.cpp index e131ce7fe24..8cbaa01810d 100644 --- a/src/Backups/RestoreCoordinationDistributed.cpp +++ b/src/Backups/RestoreCoordinationDistributed.cpp @@ -9,7 +9,7 @@ namespace DB RestoreCoordinationDistributed::RestoreCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_) : zookeeper_path(zookeeper_path_) , get_zookeeper(get_zookeeper_) - , stage_sync(zookeeper_path_ + "/stage", get_zookeeper_, &Poco::Logger::get("RestoreCoordination")) + , status_sync(zookeeper_path_ + "/status", get_zookeeper_, &Poco::Logger::get("RestoreCoordination")) { createRootNodes(); } @@ -26,14 +26,19 @@ void RestoreCoordinationDistributed::createRootNodes() zookeeper->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", ""); } -void RestoreCoordinationDistributed::syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) +void RestoreCoordinationDistributed::setStatus(const String & current_host, const String & new_status, const String & message) { - stage_sync.syncStage(current_host, new_stage, wait_hosts, timeout); + status_sync.set(current_host, new_status, message); } -void RestoreCoordinationDistributed::syncStageError(const String & current_host, const String & error_message) +Strings RestoreCoordinationDistributed::setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) { - stage_sync.syncStageError(current_host, error_message); + return status_sync.setAndWait(current_host, new_status, message, all_hosts); +} + +Strings RestoreCoordinationDistributed::setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) +{ + return status_sync.setAndWaitFor(current_host, new_status, message, all_hosts, timeout_ms); } bool RestoreCoordinationDistributed::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) diff --git a/src/Backups/RestoreCoordinationDistributed.h b/src/Backups/RestoreCoordinationDistributed.h index 0ea5db3f062..52b961cf0ef 100644 --- a/src/Backups/RestoreCoordinationDistributed.h +++ b/src/Backups/RestoreCoordinationDistributed.h @@ -14,11 +14,10 @@ public: RestoreCoordinationDistributed(const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper); ~RestoreCoordinationDistributed() override; - /// Sets the current stage and waits for other hosts to come to this stage too. - void syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) override; - - /// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage(). - void syncStageError(const String & current_host, const String & error_message) override; + /// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts. + void setStatus(const String & current_host, const String & new_status, const String & message) override; + Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) override; + Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) override; /// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table. bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override; @@ -42,7 +41,7 @@ private: const String zookeeper_path; const zkutil::GetZooKeeper get_zookeeper; - BackupCoordinationStageSync stage_sync; + BackupCoordinationStatusSync status_sync; }; } diff --git a/src/Backups/RestoreCoordinationLocal.cpp b/src/Backups/RestoreCoordinationLocal.cpp index 9cecc3f90c9..a999cc13195 100644 --- a/src/Backups/RestoreCoordinationLocal.cpp +++ b/src/Backups/RestoreCoordinationLocal.cpp @@ -7,12 +7,18 @@ namespace DB RestoreCoordinationLocal::RestoreCoordinationLocal() = default; RestoreCoordinationLocal::~RestoreCoordinationLocal() = default; -void RestoreCoordinationLocal::syncStage(const String &, int, const Strings &, std::chrono::seconds) +void RestoreCoordinationLocal::setStatus(const String &, const String &, const String &) { } -void RestoreCoordinationLocal::syncStageError(const String &, const String &) +Strings RestoreCoordinationLocal::setStatusAndWait(const String &, const String &, const String &, const Strings &) { + return {}; +} + +Strings RestoreCoordinationLocal::setStatusAndWaitFor(const String &, const String &, const String &, const Strings &, UInt64) +{ + return {}; } bool RestoreCoordinationLocal::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) diff --git a/src/Backups/RestoreCoordinationLocal.h b/src/Backups/RestoreCoordinationLocal.h index b73f345df47..68624481a7a 100644 --- a/src/Backups/RestoreCoordinationLocal.h +++ b/src/Backups/RestoreCoordinationLocal.h @@ -17,11 +17,10 @@ public: RestoreCoordinationLocal(); ~RestoreCoordinationLocal() override; - /// Sets the current stage and waits for other hosts to come to this stage too. - void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) override; - - /// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage(). - void syncStageError(const String & current_host, const String & error_message) override; + /// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts. + void setStatus(const String & current_host, const String & new_status, const String & message) override; + Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) override; + Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) override; /// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table. bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override; diff --git a/src/Backups/RestoreSettings.cpp b/src/Backups/RestoreSettings.cpp index 590d39f24f8..efa1fe2cfb8 100644 --- a/src/Backups/RestoreSettings.cpp +++ b/src/Backups/RestoreSettings.cpp @@ -74,7 +74,7 @@ namespace { case RestoreTableCreationMode::kCreate: return Field{true}; case RestoreTableCreationMode::kMustExist: return Field{false}; - case RestoreTableCreationMode::kCreateIfNotExists: return Field{"if not exists"}; + case RestoreTableCreationMode::kCreateIfNotExists: return Field{"if-not-exists"}; } throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected value of enum RestoreTableCreationMode: {}", static_cast(value)); } @@ -131,12 +131,14 @@ namespace switch (value) { case RestoreAccessCreationMode::kCreate: return Field{true}; - case RestoreAccessCreationMode::kCreateIfNotExists: return Field{"if not exists"}; + case RestoreAccessCreationMode::kCreateIfNotExists: return Field{"if-not-exists"}; case RestoreAccessCreationMode::kReplace: return Field{"replace"}; } throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected value of enum RestoreAccessCreationMode: {}", static_cast(value)); } }; + + using SettingFieldRestoreUDFCreationMode = SettingFieldRestoreAccessCreationMode; } /// List of restore settings except base_backup_name and cluster_host_ids. @@ -155,6 +157,7 @@ namespace M(Bool, allow_non_empty_tables) \ M(RestoreAccessCreationMode, create_access) \ M(Bool, allow_unresolved_access_dependencies) \ + M(RestoreUDFCreationMode, create_function) \ M(Bool, internal) \ M(String, host_id) \ M(String, coordination_zk_path) diff --git a/src/Backups/RestoreSettings.h b/src/Backups/RestoreSettings.h index 5e941b79508..1bc5d867a37 100644 --- a/src/Backups/RestoreSettings.h +++ b/src/Backups/RestoreSettings.h @@ -36,6 +36,8 @@ enum class RestoreAccessCreationMode kReplace, }; +using RestoreUDFCreationMode = RestoreAccessCreationMode; + /// Settings specified in the "SETTINGS" clause of a RESTORE query. struct RestoreSettings { @@ -99,6 +101,9 @@ struct RestoreSettings /// For example, if an user has a profile assigned and that profile is not in the backup and doesn't exist locally. bool allow_unresolved_access_dependencies = false; + /// How the RESTORE command will handle if a user-defined function which it's going to restore already exists. + RestoreUDFCreationMode create_function = RestoreUDFCreationMode::kCreateIfNotExists; + /// Internal, should not be specified by user. bool internal = false; diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 14f5b7f48f0..5b211bc50a8 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -1,9 +1,11 @@ #include #include +#include #include #include #include #include +#include #include #include #include @@ -39,54 +41,58 @@ namespace ErrorCodes namespace { - constexpr const std::string_view sql_ext = ".sql"; + /// Finding databases and tables in the backup which we're going to restore. + constexpr const char * kFindingTablesInBackupStatus = "finding tables in backup"; - String tryGetTableEngine(const IAST & ast) + /// Creating databases or finding them and checking their definitions. + constexpr const char * kCreatingDatabasesStatus = "creating databases"; + + /// Creating tables or finding them and checking their definition. + constexpr const char * kCreatingTablesStatus = "creating tables"; + + /// Inserting restored data to tables. + constexpr const char * kInsertingDataToTablesStatus = "inserting data to tables"; + + /// Error status. + constexpr const char * kErrorStatus = BackupCoordinationStatusSync::kErrorStatus; + + /// Uppercases the first character of a passed string. + String toUpperFirst(const String & str) { - const ASTCreateQuery * create = ast.as(); - if (!create) - return {}; - if (!create->storage || !create->storage->engine) - return {}; - return create->storage->engine->name; + String res = str; + res[0] = std::toupper(res[0]); + return res; } - bool hasSystemTableEngine(const IAST & ast) + /// Outputs "table " or "temporary table " + String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_upper) { - return tryGetTableEngine(ast).starts_with("System"); + String str; + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + str = fmt::format("temporary table {}", backQuoteIfNeed(table_name)); + else + str = fmt::format("table {}.{}", backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); + if (first_upper) + str[0] = std::toupper(str[0]); + return str; } - bool hasSystemAccessTableEngine(const IAST & ast) + /// Whether a specified name corresponds one of the tables backuping ACL. + bool isSystemAccessTableName(const QualifiedTableName & table_name) { - String engine_name = tryGetTableEngine(ast); - return (engine_name == "SystemUsers") || (engine_name == "SystemRoles") || (engine_name == "SystemSettingsProfiles") - || (engine_name == "SystemRowPolicies") || (engine_name == "SystemQuotas"); + if (table_name.database != DatabaseCatalog::SYSTEM_DATABASE) + return false; + + return (table_name.table == "users") || (table_name.table == "roles") || (table_name.table == "settings_profiles") + || (table_name.table == "row_policies") || (table_name.table == "quotas"); } -} -bool RestorerFromBackup::TableKey::operator ==(const TableKey & right) const -{ - return (name == right.name) && (is_temporary == right.is_temporary); -} - -bool RestorerFromBackup::TableKey::operator <(const TableKey & right) const -{ - return (name < right.name) || ((name == right.name) && (is_temporary < right.is_temporary)); -} - -std::string_view RestorerFromBackup::toString(Stage stage) -{ - switch (stage) + /// Whether a specified name corresponds one of the tables backuping ACL. + bool isSystemFunctionsTableName(const QualifiedTableName & table_name) { - case Stage::kPreparing: return "Preparing"; - case Stage::kFindingTablesInBackup: return "Finding tables in backup"; - case Stage::kCreatingDatabases: return "Creating databases"; - case Stage::kCreatingTables: return "Creating tables"; - case Stage::kInsertingDataToTables: return "Inserting data to tables"; - case Stage::kError: return "Error"; + return (table_name.database == DatabaseCatalog::SYSTEM_DATABASE) && (table_name.table == "functions"); } - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown restore stage: {}", static_cast(stage)); -} + } RestorerFromBackup::RestorerFromBackup( @@ -94,71 +100,66 @@ RestorerFromBackup::RestorerFromBackup( const RestoreSettings & restore_settings_, std::shared_ptr restore_coordination_, const BackupPtr & backup_, - const ContextMutablePtr & context_, - std::chrono::seconds timeout_) + const ContextMutablePtr & context_) : restore_query_elements(restore_query_elements_) , restore_settings(restore_settings_) , restore_coordination(restore_coordination_) , backup(backup_) , context(context_) - , timeout(timeout_) + , create_table_timeout(context->getConfigRef().getUInt64("backups.create_table_timeout", 300000)) , log(&Poco::Logger::get("RestorerFromBackup")) { } RestorerFromBackup::~RestorerFromBackup() = default; -void RestorerFromBackup::restoreMetadata() -{ - run(/* only_check_access= */ false); -} - -void RestorerFromBackup::checkAccessOnly() -{ - run(/* only_check_access= */ true); -} - -void RestorerFromBackup::run(bool only_check_access) +RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode) { try { - /// restoreMetadata() must not be called multiple times. - if (current_stage != Stage::kPreparing) + /// run() can be called onle once. + if (!current_status.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Already restoring"); - /// Calculate the root path in the backup for restoring, it's either empty or has the format "shards//replicas//". - findRootPathsInBackup(); + /// Find other hosts working along with us to execute this ON CLUSTER query. + all_hosts = BackupSettings::Util::filterHostIDs( + restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num); /// Do renaming in the create queries according to the renaming config. renaming_map = makeRenamingMapFromBackupQuery(restore_query_elements); + /// Calculate the root path in the backup for restoring, it's either empty or has the format "shards//replicas//". + findRootPathsInBackup(); + /// Find all the databases and tables which we will read from the backup. - setStage(Stage::kFindingTablesInBackup); - collectDatabaseAndTableInfos(); + setStatus(kFindingTablesInBackupStatus); + findDatabasesAndTablesInBackup(); /// Check access rights. - checkAccessForCollectedInfos(); - if (only_check_access) - return; + checkAccessForObjectsFoundInBackup(); + + if (mode == Mode::CHECK_ACCESS_ONLY) + return {}; /// Create databases using the create queries read from the backup. - setStage(Stage::kCreatingDatabases); + setStatus(kCreatingDatabasesStatus); createDatabases(); /// Create tables using the create queries read from the backup. - setStage(Stage::kCreatingTables); + setStatus(kCreatingTablesStatus); createTables(); /// All what's left is to insert data to tables. /// No more data restoring tasks are allowed after this point. - setStage(Stage::kInsertingDataToTables); + setStatus(kInsertingDataToTablesStatus); + return getDataRestoreTasks(); } catch (...) { try { /// Other hosts should know that we've encountered an error. - setStage(Stage::kError, getCurrentExceptionMessage(false)); + setStatus(kErrorStatus, getCurrentExceptionMessage(false)); } catch (...) { @@ -167,59 +168,20 @@ void RestorerFromBackup::run(bool only_check_access) } } - -RestorerFromBackup::DataRestoreTasks RestorerFromBackup::getDataRestoreTasks() +void RestorerFromBackup::setStatus(const String & new_status, const String & message) { - if (current_stage != Stage::kInsertingDataToTables) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Metadata wasn't restored"); - - if (data_restore_tasks.empty() && !access_restore_task) - return {}; - - LOG_TRACE(log, "Will insert data to tables"); - - /// Storages and table locks must exist while we're executing data restoring tasks. - auto storages = std::make_shared>(); - auto table_locks = std::make_shared>(); - storages->reserve(table_infos.size()); - table_locks->reserve(table_infos.size()); - for (const auto & table_info : table_infos | boost::adaptors::map_values) + if (new_status == kErrorStatus) { - storages->push_back(table_info.storage); - table_locks->push_back(table_info.table_lock); - } - - DataRestoreTasks res_tasks; - for (const auto & task : data_restore_tasks) - res_tasks.push_back([task, storages, table_locks] { task(); }); - - if (access_restore_task) - res_tasks.push_back([task = access_restore_task, access_control = &context->getAccessControl()] { task->restore(*access_control); }); - - return res_tasks; -} - -void RestorerFromBackup::setStage(Stage new_stage, const String & error_message) -{ - if (new_stage == Stage::kError) - LOG_ERROR(log, "{} failed with error: {}", toString(current_stage), error_message); - else - LOG_TRACE(log, "{}", toString(new_stage)); - - current_stage = new_stage; - - if (!restore_coordination) - return; - - if (new_stage == Stage::kError) - { - restore_coordination->syncStageError(restore_settings.host_id, error_message); + LOG_ERROR(log, "{} failed with {}", toUpperFirst(current_status), message); + if (restore_coordination) + restore_coordination->setStatus(restore_settings.host_id, new_status, message); } else { - auto all_hosts - = BackupSettings::Util::filterHostIDs(restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num); - restore_coordination->syncStage(restore_settings.host_id, static_cast(new_stage), all_hosts, timeout); + LOG_TRACE(log, "{}", toUpperFirst(new_status)); + current_status = new_status; + if (restore_coordination) + restore_coordination->setStatusAndWait(restore_settings.host_id, new_status, message, all_hosts); } } @@ -302,7 +264,7 @@ void RestorerFromBackup::findRootPathsInBackup() ", ")); } -void RestorerFromBackup::collectDatabaseAndTableInfos() +void RestorerFromBackup::findDatabasesAndTablesInBackup() { database_infos.clear(); table_infos.clear(); @@ -312,22 +274,22 @@ void RestorerFromBackup::collectDatabaseAndTableInfos() { case ASTBackupQuery::ElementType::TABLE: { - collectTableInfo({element.database_name, element.table_name}, false, element.partitions); + findTableInBackup({element.database_name, element.table_name}, element.partitions); break; } case ASTBackupQuery::ElementType::TEMPORARY_TABLE: { - collectTableInfo({element.database_name, element.table_name}, true, element.partitions); + findTableInBackup({DatabaseCatalog::TEMPORARY_DATABASE, element.table_name}, element.partitions); break; } case ASTBackupQuery::ElementType::DATABASE: { - collectDatabaseInfo(element.database_name, element.except_tables, /* throw_if_no_database_metadata_in_backup= */ true); + findDatabaseInBackup(element.database_name, element.except_tables); break; } case ASTBackupQuery::ElementType::ALL: { - collectAllDatabasesInfo(element.except_databases, element.except_tables); + findEverythingInBackup(element.except_databases, element.except_tables); break; } } @@ -336,9 +298,9 @@ void RestorerFromBackup::collectDatabaseAndTableInfos() LOG_INFO(log, "Will restore {} databases and {} tables", database_infos.size(), table_infos.size()); } -void RestorerFromBackup::collectTableInfo(const QualifiedTableName & table_name_in_backup, bool is_temporary_table, const std::optional & partitions) +void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional & partitions) { - String database_name_in_backup = is_temporary_table ? DatabaseCatalog::TEMPORARY_DATABASE : table_name_in_backup.database; + bool is_temporary_table = (table_name_in_backup.database == DatabaseCatalog::TEMPORARY_DATABASE); std::optional metadata_path; std::optional root_path_in_use; @@ -365,21 +327,20 @@ void RestorerFromBackup::collectTableInfo(const QualifiedTableName & table_name_ } if (!metadata_path) - throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Table {} not found in backup", table_name_in_backup.getFullName()); + throw Exception( + ErrorCodes::BACKUP_ENTRY_NOT_FOUND, + "{} not found in backup", + tableNameWithTypeToString(table_name_in_backup.database, table_name_in_backup.table, true)); - TableKey table_key; fs::path data_path_in_backup; if (is_temporary_table) { data_path_in_backup = *root_path_in_use / "temporary_tables" / "data" / escapeForFileName(table_name_in_backup.table); - table_key.name.table = renaming_map.getNewTemporaryTableName(table_name_in_backup.table); - table_key.is_temporary = true; } else { data_path_in_backup = *root_path_in_use / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table); - table_key.name = renaming_map.getNewTableName(table_name_in_backup); } auto read_buffer = backup->readFile(*metadata_path)->getReadBuffer(); @@ -388,27 +349,30 @@ void RestorerFromBackup::collectTableInfo(const QualifiedTableName & table_name_ read_buffer.reset(); ParserCreateQuery create_parser; ASTPtr create_table_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, create_table_query); + renameDatabaseAndTableNameInCreateQuery(create_table_query, renaming_map, context->getGlobalContext()); - if (auto it = table_infos.find(table_key); it != table_infos.end()) + QualifiedTableName table_name = renaming_map.getNewTableName(table_name_in_backup); + + if (auto it = table_infos.find(table_name); it != table_infos.end()) { const TableInfo & table_info = it->second; if (table_info.create_table_query && (serializeAST(*table_info.create_table_query) != serializeAST(*create_table_query))) { throw Exception( ErrorCodes::CANNOT_RESTORE_TABLE, - "Extracted two different create queries for the same {}table {}: {} and {}", - (is_temporary_table ? "temporary " : ""), - table_key.name.getFullName(), + "Extracted two different create queries for the same {}: {} and {}", + tableNameWithTypeToString(table_name.database, table_name.table, false), serializeAST(*table_info.create_table_query), serializeAST(*create_table_query)); } } - TableInfo & res_table_info = table_infos[table_key]; + TableInfo & res_table_info = table_infos[table_name]; res_table_info.create_table_query = create_table_query; + res_table_info.is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table}); + res_table_info.dependencies = getDependenciesSetFromCreateQuery(context->getGlobalContext(), table_name, create_table_query); + res_table_info.has_data = backup->hasFiles(data_path_in_backup); res_table_info.data_path_in_backup = data_path_in_backup; - res_table_info.dependencies = getDependenciesSetFromCreateQuery(context->getGlobalContext(), table_key.name, create_table_query); if (partitions) { @@ -417,35 +381,45 @@ void RestorerFromBackup::collectTableInfo(const QualifiedTableName & table_name_ insertAtEnd(*res_table_info.partitions, *partitions); } - if (hasSystemAccessTableEngine(*create_table_query)) + if (!restore_settings.structure_only && isSystemAccessTableName(table_name)) { - if (!access_restore_task) - access_restore_task = std::make_shared(backup, restore_settings, restore_coordination); - access_restore_task->addDataPath(data_path_in_backup); + if (!access_restorer) + access_restorer = std::make_unique(backup, restore_settings); + access_restorer->addDataPath(data_path_in_backup, table_name); } } -void RestorerFromBackup::collectDatabaseInfo(const String & database_name_in_backup, const std::set & except_table_names, bool throw_if_no_database_metadata_in_backup) +void RestorerFromBackup::findDatabaseInBackup(const String & database_name_in_backup, const std::set & except_table_names) { std::optional metadata_path; std::unordered_set table_names_in_backup; for (const auto & root_path_in_backup : root_paths_in_backup) { - fs::path try_metadata_path = root_path_in_backup / "metadata" / (escapeForFileName(database_name_in_backup) + ".sql"); - if (!metadata_path && backup->fileExists(try_metadata_path)) + fs::path try_metadata_path, try_tables_metadata_path; + if (database_name_in_backup == DatabaseCatalog::TEMPORARY_DATABASE) + { + try_tables_metadata_path = root_path_in_backup / "temporary_tables" / "metadata"; + } + else + { + try_metadata_path = root_path_in_backup / "metadata" / (escapeForFileName(database_name_in_backup) + ".sql"); + try_tables_metadata_path = root_path_in_backup / "metadata" / escapeForFileName(database_name_in_backup); + } + + if (!metadata_path && !try_metadata_path.empty() && backup->fileExists(try_metadata_path)) metadata_path = try_metadata_path; - Strings file_names = backup->listFiles(root_path_in_backup / "metadata" / escapeForFileName(database_name_in_backup)); + Strings file_names = backup->listFiles(try_tables_metadata_path); for (const String & file_name : file_names) { - if (!file_name.ends_with(sql_ext)) + if (!file_name.ends_with(".sql")) continue; - String file_name_without_ext = file_name.substr(0, file_name.length() - sql_ext.length()); + String file_name_without_ext = file_name.substr(0, file_name.length() - strlen(".sql")); table_names_in_backup.insert(unescapeForFileName(file_name_without_ext)); } } - if (!metadata_path && throw_if_no_database_metadata_in_backup) + if (!metadata_path && table_names_in_backup.empty()) throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Database {} not found in backup", backQuoteIfNeed(database_name_in_backup)); if (metadata_path) @@ -456,7 +430,7 @@ void RestorerFromBackup::collectDatabaseInfo(const String & database_name_in_bac read_buffer.reset(); ParserCreateQuery create_parser; ASTPtr create_database_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, create_database_query); + renameDatabaseAndTableNameInCreateQuery(create_database_query, renaming_map, context->getGlobalContext()); String database_name = renaming_map.getNewDatabaseName(database_name_in_backup); DatabaseInfo & database_info = database_infos[database_name]; @@ -472,6 +446,7 @@ void RestorerFromBackup::collectDatabaseInfo(const String & database_name_in_bac } database_info.create_database_query = create_database_query; + database_info.is_predefined_database = DatabaseCatalog::isPredefinedDatabase(database_name); } for (const String & table_name_in_backup : table_names_in_backup) @@ -479,33 +454,26 @@ void RestorerFromBackup::collectDatabaseInfo(const String & database_name_in_bac if (except_table_names.contains({database_name_in_backup, table_name_in_backup})) continue; - collectTableInfo({database_name_in_backup, table_name_in_backup}, /* is_temporary_table= */ false, /* partitions= */ {}); + findTableInBackup({database_name_in_backup, table_name_in_backup}, /* partitions= */ {}); } } -void RestorerFromBackup::collectAllDatabasesInfo(const std::set & except_database_names, const std::set & except_table_names) +void RestorerFromBackup::findEverythingInBackup(const std::set & except_database_names, const std::set & except_table_names) { std::unordered_set database_names_in_backup; - std::unordered_set temporary_table_names_in_backup; for (const auto & root_path_in_backup : root_paths_in_backup) { Strings file_names = backup->listFiles(root_path_in_backup / "metadata"); for (String & file_name : file_names) { - if (file_name.ends_with(sql_ext)) - file_name.resize(file_name.length() - sql_ext.length()); + if (file_name.ends_with(".sql")) + file_name.resize(file_name.length() - strlen(".sql")); database_names_in_backup.emplace(unescapeForFileName(file_name)); } - file_names = backup->listFiles(root_path_in_backup / "temporary_tables" / "metadata"); - for (String & file_name : file_names) - { - if (!file_name.ends_with(sql_ext)) - continue; - file_name.resize(file_name.length() - sql_ext.length()); - temporary_table_names_in_backup.emplace(unescapeForFileName(file_name)); - } + if (backup->hasFiles(root_path_in_backup / "temporary_tables" / "metadata")) + database_names_in_backup.emplace(DatabaseCatalog::TEMPORARY_DATABASE); } for (const String & database_name_in_backup : database_names_in_backup) @@ -513,19 +481,16 @@ void RestorerFromBackup::collectAllDatabasesInfo(const std::set & except if (except_database_names.contains(database_name_in_backup)) continue; - collectDatabaseInfo(database_name_in_backup, except_table_names, /* throw_if_no_database_metadata_in_backup= */ false); + findDatabaseInBackup(database_name_in_backup, except_table_names); } - - for (const String & temporary_table_name_in_backup : temporary_table_names_in_backup) - collectTableInfo({"", temporary_table_name_in_backup}, /* is_temporary_table= */ true, /* partitions= */ {}); } -void RestorerFromBackup::checkAccessForCollectedInfos() const +void RestorerFromBackup::checkAccessForObjectsFoundInBackup() const { AccessRightsElements required_access; - for (const auto & database_name : database_infos | boost::adaptors::map_keys) + for (const auto & [database_name, database_info] : database_infos) { - if (DatabaseCatalog::isPredefinedDatabaseName(database_name)) + if (database_info.is_predefined_database) continue; AccessFlags flags; @@ -541,10 +506,20 @@ void RestorerFromBackup::checkAccessForCollectedInfos() const for (const auto & [table_name, table_info] : table_infos) { - if (hasSystemTableEngine(*table_info.create_table_query)) + if (table_info.is_predefined_table) + { + if (isSystemFunctionsTableName(table_name)) + { + /// CREATE_FUNCTION privilege is required to restore the "system.functions" table. + if (!restore_settings.structure_only && table_info.has_data) + required_access.emplace_back(AccessType::CREATE_FUNCTION); + } + /// Privileges required to restore ACL system tables are checked separately + /// (see access_restore_task->getRequiredAccess() below). continue; + } - if (table_name.is_temporary) + if (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE) { if (restore_settings.create_table != RestoreTableCreationMode::kMustExist) required_access.emplace_back(AccessType::CREATE_TEMPORARY_TABLE); @@ -564,8 +539,7 @@ void RestorerFromBackup::checkAccessForCollectedInfos() const flags |= AccessType::CREATE_TABLE; } - if (!restore_settings.structure_only && !create.is_dictionary && !create.is_ordinary_view - && backup->hasFiles(table_info.data_path_in_backup)) + if (!restore_settings.structure_only && table_info.has_data) { flags |= AccessType::INSERT; } @@ -578,11 +552,11 @@ void RestorerFromBackup::checkAccessForCollectedInfos() const flags = AccessType::SHOW_TABLES; } - required_access.emplace_back(flags, table_name.name.database, table_name.name.table); + required_access.emplace_back(flags, table_name.database, table_name.table); } - if (access_restore_task) - insertAtEnd(required_access, access_restore_task->getRequiredAccess()); + if (access_restorer) + insertAtEnd(required_access, access_restorer->getRequiredAccess()); /// We convert to AccessRights and back to check access rights in a predictable way /// (some elements could be duplicated or not sorted). @@ -597,7 +571,7 @@ void RestorerFromBackup::createDatabases() for (const auto & [database_name, database_info] : database_infos) { bool need_create_database = (restore_settings.create_database != RestoreDatabaseCreationMode::kMustExist); - if (need_create_database && DatabaseCatalog::isPredefinedDatabaseName(database_name)) + if (database_info.is_predefined_database) need_create_database = false; /// Predefined databases always exist. if (need_create_database) @@ -610,15 +584,18 @@ void RestorerFromBackup::createDatabases() create_database_query->as().if_not_exists = true; } LOG_TRACE(log, "Creating database {}: {}", backQuoteIfNeed(database_name), serializeAST(*create_database_query)); - executeCreateQuery(create_database_query); + InterpreterCreateQuery interpreter{create_database_query, context}; + interpreter.setInternal(true); + interpreter.execute(); } DatabasePtr database = DatabaseCatalog::instance().getDatabase(database_name); - if (!restore_settings.allow_different_database_def) + if (!restore_settings.allow_different_database_def && !database_info.is_predefined_database) { /// Check that the database's definition is the same as expected. - ASTPtr create_database_query = database->getCreateDatabaseQueryForBackup(); + ASTPtr create_database_query = database->getCreateDatabaseQuery(); + adjustCreateQueryForBackup(create_database_query, context->getGlobalContext(), nullptr); ASTPtr expected_create_query = database_info.create_database_query; if (serializeAST(*create_database_query) != serializeAST(*expected_create_query)) { @@ -643,62 +620,62 @@ void RestorerFromBackup::createTables() if (tables_to_create.empty()) break; /// We've already created all the tables. - for (const auto & table_key : tables_to_create) + for (const auto & table_name : tables_to_create) { - auto & table_info = table_infos.at(table_key); + auto & table_info = table_infos.at(table_name); - DatabasePtr database; - if (table_key.is_temporary) - database = DatabaseCatalog::instance().getDatabaseForTemporaryTables(); - else - database = DatabaseCatalog::instance().getDatabase(table_key.name.database); + DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_name.database); bool need_create_table = (restore_settings.create_table != RestoreTableCreationMode::kMustExist); - if (need_create_table && hasSystemTableEngine(*table_info.create_table_query)) - need_create_table = false; /// Tables with System* table engine already exist or can't be created by SQL anyway. + if (table_info.is_predefined_table) + need_create_table = false; /// Predefined tables always exist. if (need_create_table) { - /// Execute CREATE TABLE query (we call IDatabase::createTableRestoredFromBackup() to allow the database to do some - /// database-specific things). auto create_table_query = table_info.create_table_query; if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists) { create_table_query = create_table_query->clone(); create_table_query->as().if_not_exists = true; } + LOG_TRACE( log, - "Creating {}table {}: {}", - (table_key.is_temporary ? "temporary " : ""), - table_key.name.getFullName(), + "Creating {}: {}", + tableNameWithTypeToString(table_name.database, table_name.table, false), serializeAST(*create_table_query)); - database->createTableRestoredFromBackup(create_table_query, *this); + /// Execute CREATE TABLE query (we call IDatabase::createTableRestoredFromBackup() to allow the database to do some + /// database-specific things). + database->createTableRestoredFromBackup( + create_table_query, + context, + restore_coordination, + std::chrono::duration_cast(create_table_timeout).count()); } table_info.created = true; - auto resolved_id = table_key.is_temporary - ? context->resolveStorageID(StorageID{"", table_key.name.table}, Context::ResolveExternal) - : context->resolveStorageID(StorageID{table_key.name.database, table_key.name.table}, Context::ResolveGlobal); + auto resolved_id = (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE) + ? context->resolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal) + : context->resolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal); auto storage = database->getTable(resolved_id.table_name, context); table_info.storage = storage; table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); - if (!restore_settings.allow_different_table_def) + if (!restore_settings.allow_different_table_def && !table_info.is_predefined_table) { - ASTPtr create_table_query = storage->getCreateQueryForBackup(context, nullptr); + ASTPtr create_table_query = database->getCreateTableQuery(resolved_id.table_name, context); + adjustCreateQueryForBackup(create_table_query, context->getGlobalContext(), nullptr); ASTPtr expected_create_query = table_info.create_table_query; if (serializeAST(*create_table_query) != serializeAST(*expected_create_query)) { throw Exception( ErrorCodes::CANNOT_RESTORE_TABLE, - "The {}table {} has a different definition: {} " + "{} has a different definition: {} " "comparing to its definition in the backup: {}", - (table_key.is_temporary ? "temporary " : ""), - table_key.name.getFullName(), + tableNameWithTypeToString(table_name.database, table_name.table, true), serializeAST(*create_table_query), serializeAST(*expected_create_query)); } @@ -708,6 +685,15 @@ void RestorerFromBackup::createTables() { const auto & data_path_in_backup = table_info.data_path_in_backup; const auto & partitions = table_info.partitions; + if (partitions && !storage->supportsBackupPartition()) + { + throw Exception( + ErrorCodes::CANNOT_RESTORE_TABLE, + "Table engine {} doesn't support partitions, cannot restore {}", + storage->getName(), + tableNameWithTypeToString(table_name.database, table_name.table, false)); + } + storage->restoreDataFromBackup(*this, data_path_in_backup, partitions); } } @@ -715,9 +701,9 @@ void RestorerFromBackup::createTables() } /// Returns the list of tables without dependencies or those which dependencies have been created before. -std::vector RestorerFromBackup::findTablesWithoutDependencies() const +std::vector RestorerFromBackup::findTablesWithoutDependencies() const { - std::vector tables_without_dependencies; + std::vector tables_without_dependencies; bool all_tables_created = true; for (const auto & [key, table_info] : table_infos) @@ -732,7 +718,7 @@ std::vector RestorerFromBackup::findTablesWithoutD bool all_dependencies_met = true; for (const auto & dependency : table_info.dependencies) { - auto it = table_infos.find(TableKey{dependency, false}); + auto it = table_infos.find(dependency); if ((it != table_infos.end()) && !it->second.created) { all_dependencies_met = false; @@ -751,7 +737,7 @@ std::vector RestorerFromBackup::findTablesWithoutD return {}; /// Cyclic dependency? We'll try to create those tables anyway but probably it's going to fail. - std::vector tables_with_cyclic_dependencies; + std::vector tables_with_cyclic_dependencies; for (const auto & [key, table_info] : table_infos) { if (!table_info.created) @@ -764,7 +750,7 @@ std::vector RestorerFromBackup::findTablesWithoutD "Some tables have cyclic dependency from each other: {}", boost::algorithm::join( tables_with_cyclic_dependencies - | boost::adaptors::transformed([](const TableKey & key) -> String { return key.name.getFullName(); }), + | boost::adaptors::transformed([](const QualifiedTableName & table_name) -> String { return table_name.getFullName(); }), ", ")); return tables_with_cyclic_dependencies; @@ -772,38 +758,52 @@ std::vector RestorerFromBackup::findTablesWithoutD void RestorerFromBackup::addDataRestoreTask(DataRestoreTask && new_task) { - if (current_stage == Stage::kInsertingDataToTables) + if (current_status == kInsertingDataToTablesStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding data-restoring tasks is not allowed"); data_restore_tasks.push_back(std::move(new_task)); } void RestorerFromBackup::addDataRestoreTasks(DataRestoreTasks && new_tasks) { - if (current_stage == Stage::kInsertingDataToTables) + if (current_status == kInsertingDataToTablesStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding data-restoring tasks is not allowed"); insertAtEnd(data_restore_tasks, std::move(new_tasks)); } -void RestorerFromBackup::checkPathInBackupToRestoreAccess(const String & path) +RestorerFromBackup::DataRestoreTasks RestorerFromBackup::getDataRestoreTasks() { - if (!access_restore_task || !access_restore_task->hasDataPath(path)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Path to restore access was not added"); + if (data_restore_tasks.empty()) + return {}; + + LOG_TRACE(log, "Will insert data to tables"); + + /// Storages and table locks must exist while we're executing data restoring tasks. + auto storages = std::make_shared>(); + auto table_locks = std::make_shared>(); + storages->reserve(table_infos.size()); + table_locks->reserve(table_infos.size()); + for (const auto & table_info : table_infos | boost::adaptors::map_values) + { + storages->push_back(table_info.storage); + table_locks->push_back(table_info.table_lock); + } + + DataRestoreTasks res_tasks; + for (const auto & task : data_restore_tasks) + res_tasks.push_back([task, storages, table_locks] { task(); }); + + return res_tasks; } -void RestorerFromBackup::executeCreateQuery(const ASTPtr & create_query) const +std::vector> RestorerFromBackup::getAccessEntitiesToRestore() { - InterpreterCreateQuery interpreter{create_query, context}; - interpreter.setInternal(true); - interpreter.execute(); -} + if (!access_restorer || access_restored) + return {}; -void RestorerFromBackup::throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine) -{ - throw Exception( - ErrorCodes::CANNOT_RESTORE_TABLE, - "Table engine {} doesn't support partitions, cannot table {}", - table_engine, - storage_id.getFullTableName()); + /// getAccessEntitiesToRestore() will return entities only when called first time (we don't want to restore the same entities again). + access_restored = true; + + return access_restorer->getAccessEntities(context->getAccessControl()); } void RestorerFromBackup::throwTableIsNotEmpty(const StorageID & storage_id) diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index 65139e0b946..ae2f0c76832 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -15,7 +15,9 @@ class IBackup; using BackupPtr = std::shared_ptr; class IRestoreCoordination; struct StorageID; -class AccessRestoreTask; +class AccessRestorerFromBackup; +struct IAccessEntity; +using AccessEntityPtr = std::shared_ptr; /// Restores the definition of databases and tables and prepares tasks to restore the data of the tables. class RestorerFromBackup : private boost::noncopyable @@ -26,63 +28,38 @@ public: const RestoreSettings & restore_settings_, std::shared_ptr restore_coordination_, const BackupPtr & backup_, - const ContextMutablePtr & context_, - std::chrono::seconds timeout_); + const ContextMutablePtr & context_); ~RestorerFromBackup(); - /// Restores the definition of databases and tables and prepares tasks to restore the data of the tables. - /// restoreMetadata() checks access rights internally so checkAccessRightsOnly() shouldn't be called first. - void restoreMetadata(); + enum Mode + { + /// Restores databases and tables. + RESTORE, - /// Only checks access rights without restoring anything. - void checkAccessOnly(); + /// Only checks access rights without restoring anything. + CHECK_ACCESS_ONLY + }; using DataRestoreTask = std::function; using DataRestoreTasks = std::vector; - DataRestoreTasks getDataRestoreTasks(); + + /// Restores the metadata of databases and tables and returns tasks to restore the data of tables. + DataRestoreTasks run(Mode mode); BackupPtr getBackup() const { return backup; } const RestoreSettings & getRestoreSettings() const { return restore_settings; } bool isNonEmptyTableAllowed() const { return getRestoreSettings().allow_non_empty_tables; } std::shared_ptr getRestoreCoordination() const { return restore_coordination; } - std::chrono::seconds getTimeout() const { return timeout; } ContextMutablePtr getContext() const { return context; } - void executeCreateQuery(const ASTPtr & create_query) const; /// Adds a data restore task which will be later returned by getDataRestoreTasks(). /// This function can be called by implementations of IStorage::restoreFromBackup() in inherited storage classes. void addDataRestoreTask(DataRestoreTask && new_task); void addDataRestoreTasks(DataRestoreTasks && new_tasks); - /// Adds a new data path to restore access control. - void checkPathInBackupToRestoreAccess(const String & path); - - /// Reading a backup includes a few stages: - enum class Stage - { - /// Initial stage. - kPreparing, - - /// Finding databases and tables in the backup which we're going to restore. - kFindingTablesInBackup, - - /// Creating databases or finding them and checking their definitions. - kCreatingDatabases, - - /// Creating tables or finding them and checking their definition. - kCreatingTables, - - /// Inserting restored data to tables. - kInsertingDataToTables, - - /// An error happens during any of the stages above, the backup is not restored properly. - kError = -1, - }; - static std::string_view toString(Stage stage); - - /// Throws an exception that a specified table engine doesn't support partitions. - [[noreturn]] static void throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine); + /// Returns the list of access entities to restore. + std::vector> getAccessEntitiesToRestore(); /// Throws an exception that a specified table is already non-empty. [[noreturn]] static void throwTableIsNotEmpty(const StorageID & storage_id); @@ -93,54 +70,56 @@ private: std::shared_ptr restore_coordination; BackupPtr backup; ContextMutablePtr context; - std::chrono::seconds timeout; + std::chrono::milliseconds create_table_timeout; Poco::Logger * log; - Stage current_stage = Stage::kPreparing; - std::vector root_paths_in_backup; + Strings all_hosts; DDLRenamingMap renaming_map; + std::vector root_paths_in_backup; - void run(bool only_check_access); - void setStage(Stage new_stage, const String & error_message = {}); void findRootPathsInBackup(); - void collectDatabaseAndTableInfos(); - void collectTableInfo(const QualifiedTableName & table_name_in_backup, bool is_temporary_table, const std::optional & partitions); - void collectDatabaseInfo(const String & database_name_in_backup, const std::set & except_table_names, bool throw_if_no_database_metadata_in_backup); - void collectAllDatabasesInfo(const std::set & except_database_names, const std::set & except_table_names); - void checkAccessForCollectedInfos() const; + + void findDatabasesAndTablesInBackup(); + void findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional & partitions); + void findDatabaseInBackup(const String & database_name_in_backup, const std::set & except_table_names); + void findEverythingInBackup(const std::set & except_database_names, const std::set & except_table_names); + + void checkAccessForObjectsFoundInBackup() const; + void createDatabases(); void createTables(); + DataRestoreTasks getDataRestoreTasks(); + + void setStatus(const String & new_status, const String & message = ""); + struct DatabaseInfo { ASTPtr create_database_query; + bool is_predefined_database = false; }; struct TableInfo { ASTPtr create_table_query; - std::optional partitions; - std::filesystem::path data_path_in_backup; + bool is_predefined_table = false; std::unordered_set dependencies; + bool has_data = false; + std::filesystem::path data_path_in_backup; + std::optional partitions; bool created = false; StoragePtr storage; TableLockHolder table_lock; }; - struct TableKey - { - QualifiedTableName name; - bool is_temporary = false; - bool operator ==(const TableKey & right) const; - bool operator <(const TableKey & right) const; - }; - - std::vector findTablesWithoutDependencies() const; + std::vector findTablesWithoutDependencies() const; + String current_status; std::unordered_map database_infos; - std::map table_infos; + std::map table_infos; std::vector data_restore_tasks; - std::shared_ptr access_restore_task; + std::unique_ptr access_restorer; + bool access_restored = false; }; } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 10bdc464ac6..deb3206db31 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -96,6 +96,10 @@ if (TARGET ch_contrib::rdkafka) add_headers_and_sources(dbms Storages/Kafka) endif() +if (TARGET ch_contrib::nats_io) + add_headers_and_sources(dbms Storages/NATS) +endif() + add_headers_and_sources(dbms Storages/MeiliSearch) if (TARGET ch_contrib::amqp_cpp) @@ -371,6 +375,10 @@ if (TARGET ch_contrib::rdkafka) dbms_target_link_libraries(PRIVATE ch_contrib::rdkafka ch_contrib::cppkafka) endif() +if (TARGET ch_contrib::nats_io) + dbms_target_link_libraries(PRIVATE ch_contrib::nats_io) +endif() + if (TARGET ch_contrib::sasl2) dbms_target_link_libraries(PRIVATE ch_contrib::sasl2) endif() diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 396fd97368e..8230c97f49c 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1834,9 +1834,21 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) bool ClientBase::processQueryText(const String & text) { - if (exit_strings.end() != exit_strings.find(trim(text, [](char c) { return isWhitespaceASCII(c) || c == ';'; }))) + auto trimmed_input = trim(text, [](char c) { return isWhitespaceASCII(c) || c == ';'; }); + + if (exit_strings.end() != exit_strings.find(trimmed_input)) return false; + if (trimmed_input.starts_with("\\i")) + { + size_t skip_prefix_size = std::strlen("\\i"); + auto file_name = trim( + trimmed_input.substr(skip_prefix_size, trimmed_input.size() - skip_prefix_size), + [](char c) { return isWhitespaceASCII(c); }); + + return processMultiQueryFromFile(file_name); + } + if (!is_multiquery) { assert(!query_fuzzer_runs); @@ -2019,6 +2031,17 @@ void ClientBase::runInteractive() } +bool ClientBase::processMultiQueryFromFile(const String & file_name) +{ + String queries_from_file; + + ReadBufferFromFile in(file_name); + readStringUntilEOF(queries_from_file, in); + + return executeMultiQuery(queries_from_file); +} + + void ClientBase::runNonInteractive() { if (delayed_interactive) @@ -2026,23 +2049,13 @@ void ClientBase::runNonInteractive() if (!queries_files.empty()) { - auto process_multi_query_from_file = [&](const String & file) - { - String queries_from_file; - - ReadBufferFromFile in(file); - readStringUntilEOF(queries_from_file, in); - - return executeMultiQuery(queries_from_file); - }; - for (const auto & queries_file : queries_files) { for (const auto & interleave_file : interleave_queries_files) - if (!process_multi_query_from_file(interleave_file)) + if (!processMultiQueryFromFile(interleave_file)) return; - if (!process_multi_query_from_file(queries_file)) + if (!processMultiQueryFromFile(queries_file)) return; } diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index d34fe282839..ec2267a3be6 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -154,6 +154,7 @@ private: protected: static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context); + bool processMultiQueryFromFile(const String & file_name); bool is_interactive = false; /// Use either interactive line editing interface or batch mode. bool is_multiquery = false; diff --git a/src/Columns/ReverseIndex.h b/src/Columns/ReverseIndex.h index 0b308a81084..3f4427e17ad 100644 --- a/src/Columns/ReverseIndex.h +++ b/src/Columns/ReverseIndex.h @@ -145,10 +145,10 @@ struct ReverseIndexHashTableCell * separately. */ template -class ReverseIndexHashTableBase : public HashTable, HashTableAllocator> +class ReverseIndexHashTableBase : public HashTable, HashTableAllocator> { using State = typename Cell::State; - using Base = HashTable, HashTableAllocator>; + using Base = HashTable, HashTableAllocator>; public: using Base::Base; diff --git a/src/Common/Epoll.cpp b/src/Common/Epoll.cpp index d2c16c186ce..9b2589f0589 100644 --- a/src/Common/Epoll.cpp +++ b/src/Common/Epoll.cpp @@ -70,6 +70,9 @@ size_t Epoll::getManyReady(int max_events, epoll_event * events_out, bool blocki if (ready_size == -1 && errno != EINTR) throwFromErrno("Error in epoll_wait", DB::ErrorCodes::EPOLL_ERROR); + + if (errno == EINTR) + LOG_TEST(&Poco::Logger::get("Epoll"), "EINTR"); } while (ready_size <= 0 && (ready_size != 0 || blocking)); diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 6f2ac41cc08..206f2061cde 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -631,8 +631,9 @@ M(660, HDFS_ERROR) \ M(661, CANNOT_SEND_SIGNAL) \ M(662, FS_METADATA_ERROR) \ - M(663, CANNOT_COLLECT_OBJECTS_FOR_BACKUP) \ + M(663, INCONSISTENT_METADATA_FOR_BACKUP) \ M(664, ACCESS_STORAGE_DOESNT_ALLOW_BACKUP) \ + M(665, CANNOT_CONNECT_NATS) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/HashTable/ClearableHashMap.h b/src/Common/HashTable/ClearableHashMap.h index fda01dcf4bc..689f3ff0500 100644 --- a/src/Common/HashTable/ClearableHashMap.h +++ b/src/Common/HashTable/ClearableHashMap.h @@ -14,14 +14,12 @@ struct ClearableHashMapCell : public ClearableHashTableCell, - typename Grower = HashTableGrower<>, - typename Allocator = HashTableAllocator -> + typename Grower = HashTableGrowerWithPrecalculation<>, + typename Allocator = HashTableAllocator> class ClearableHashMap : public HashTable, Hash, Grower, Allocator> { public: diff --git a/src/Common/HashTable/ClearableHashSet.h b/src/Common/HashTable/ClearableHashSet.h index a14490c4551..371302dd8a9 100644 --- a/src/Common/HashTable/ClearableHashSet.h +++ b/src/Common/HashTable/ClearableHashSet.h @@ -48,14 +48,13 @@ struct ClearableHashTableCell : public BaseCell ClearableHashTableCell(const Key & key_, const State & state) : BaseCell(key_, state), version(state.version) {} }; -template -< +template < typename Key, typename Hash = DefaultHash, - typename Grower = HashTableGrower<>, - typename Allocator = HashTableAllocator -> -class ClearableHashSet : public HashTable>, Hash, Grower, Allocator> + typename Grower = HashTableGrowerWithPrecalculation<>, + typename Allocator = HashTableAllocator> +class ClearableHashSet + : public HashTable>, Hash, Grower, Allocator> { public: using Base = HashTable>, Hash, Grower, Allocator>; @@ -68,14 +67,17 @@ public: } }; -template -< +template < typename Key, typename Hash = DefaultHash, - typename Grower = HashTableGrower<>, - typename Allocator = HashTableAllocator -> -class ClearableHashSetWithSavedHash: public HashTable>, Hash, Grower, Allocator> + typename Grower = HashTableGrowerWithPrecalculation<>, + typename Allocator = HashTableAllocator> +class ClearableHashSetWithSavedHash : public HashTable< + Key, + ClearableHashTableCell>, + Hash, + Grower, + Allocator> { public: void clear() @@ -91,8 +93,4 @@ using ClearableHashSetWithStackMemory = ClearableHashSet< Hash, HashTableGrower, HashTableAllocatorWithStackMemory< - (1ULL << initial_size_degree) - * sizeof( - ClearableHashTableCell< - Key, - HashTableCell>)>>; + (1ULL << initial_size_degree) * sizeof(ClearableHashTableCell>)>>; diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h index 236a6d65707..32fc9a8d76c 100644 --- a/src/Common/HashTable/HashMap.h +++ b/src/Common/HashTable/HashMap.h @@ -181,7 +181,7 @@ template < typename Key, typename Cell, typename Hash = DefaultHash, - typename Grower = HashTableGrower<>, + typename Grower = HashTableGrowerWithPrecalculation<>, typename Allocator = HashTableAllocator> class HashMapTable : public HashTable { @@ -296,7 +296,7 @@ template < typename Key, typename Mapped, typename Hash = DefaultHash, - typename Grower = HashTableGrower<>, + typename Grower = HashTableGrowerWithPrecalculation<>, typename Allocator = HashTableAllocator> using HashMap = HashMapTable, Hash, Grower, Allocator>; @@ -305,7 +305,7 @@ template < typename Key, typename Mapped, typename Hash = DefaultHash, - typename Grower = HashTableGrower<>, + typename Grower = HashTableGrowerWithPrecalculation<>, typename Allocator = HashTableAllocator> using HashMapWithSavedHash = HashMapTable, Hash, Grower, Allocator>; diff --git a/src/Common/HashTable/HashSet.h b/src/Common/HashTable/HashSet.h index d06fc761362..279ab167347 100644 --- a/src/Common/HashTable/HashSet.h +++ b/src/Common/HashTable/HashSet.h @@ -16,14 +16,12 @@ */ -template -< +template < typename Key, typename TCell, typename Hash = DefaultHash, - typename Grower = HashTableGrower<>, - typename Allocator = HashTableAllocator -> + typename Grower = HashTableGrowerWithPrecalculation<>, + typename Allocator = HashTableAllocator> class HashSetTable : public HashTable { public: @@ -84,13 +82,11 @@ struct HashSetCellWithSavedHash : public HashTableCell size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; } }; -template -< +template < typename Key, typename Hash = DefaultHash, - typename Grower = HashTableGrower<>, - typename Allocator = HashTableAllocator -> + typename Grower = HashTableGrowerWithPrecalculation<>, + typename Allocator = HashTableAllocator> using HashSet = HashSetTable, Hash, Grower, Allocator>; template @@ -102,13 +98,11 @@ using HashSetWithStackMemory = HashSet< (1ULL << initial_size_degree) * sizeof(HashTableCell)>>; -template -< +template < typename Key, typename Hash = DefaultHash, - typename Grower = HashTableGrower<>, - typename Allocator = HashTableAllocator -> + typename Grower = HashTableGrowerWithPrecalculation<>, + typename Allocator = HashTableAllocator> using HashSetWithSavedHash = HashSetTable, Hash, Grower, Allocator>; template diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h index 7db693ddcec..e8a204c4043 100644 --- a/src/Common/HashTable/HashTable.h +++ b/src/Common/HashTable/HashTable.h @@ -226,6 +226,7 @@ void insertSetMapped(MappedType & dest, const ValueType & src) { dest = src.seco /** Determines the size of the hash table, and when and how much it should be resized. + * Has very small state (one UInt8) and useful for Set-s allocated in automatic memory (see uniqExact as an example). */ template struct HashTableGrower @@ -275,6 +276,68 @@ struct HashTableGrower } }; +/** Determines the size of the hash table, and when and how much it should be resized. + * This structure is aligned to cache line boundary and also occupies it all. + * Precalculates some values to speed up lookups and insertion into the HashTable (and thus has bigger memory footprint than HashTableGrower). + */ +template +class alignas(64) HashTableGrowerWithPrecalculation +{ + /// The state of this structure is enough to get the buffer size of the hash table. + + UInt8 size_degree = initial_size_degree; + size_t precalculated_mask = (1ULL << initial_size_degree) - 1; + size_t precalculated_max_fill = 1ULL << (initial_size_degree - 1); + +public: + UInt8 sizeDegree() const { return size_degree; } + + void increaseSizeDegree(UInt8 delta) + { + size_degree += delta; + precalculated_mask = (1ULL << size_degree) - 1; + precalculated_max_fill = 1ULL << (size_degree - 1); + } + + static constexpr auto initial_count = 1ULL << initial_size_degree; + + /// If collision resolution chains are contiguous, we can implement erase operation by moving the elements. + static constexpr auto performs_linear_probing_with_single_step = true; + + /// The size of the hash table in the cells. + size_t bufSize() const { return 1ULL << size_degree; } + + /// From the hash value, get the cell number in the hash table. + size_t place(size_t x) const { return x & precalculated_mask; } + + /// The next cell in the collision resolution chain. + size_t next(size_t pos) const { return (pos + 1) & precalculated_mask; } + + /// Whether the hash table is sufficiently full. You need to increase the size of the hash table, or remove something unnecessary from it. + bool overflow(size_t elems) const { return elems > precalculated_max_fill; } + + /// Increase the size of the hash table. + void increaseSize() { increaseSizeDegree(size_degree >= 23 ? 1 : 2); } + + /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table. + void set(size_t num_elems) + { + size_degree = num_elems <= 1 + ? initial_size_degree + : ((initial_size_degree > static_cast(log2(num_elems - 1)) + 2) + ? initial_size_degree + : (static_cast(log2(num_elems - 1)) + 2)); + increaseSizeDegree(0); + } + + void setBufSize(size_t buf_size_) + { + size_degree = static_cast(log2(buf_size_ - 1) + 1); + increaseSizeDegree(0); + } +}; + +static_assert(sizeof(HashTableGrowerWithPrecalculation<>) == 64); /** When used as a Grower, it turns a hash table into something like a lookup table. * It remains non-optimal - the cells store the keys. diff --git a/src/Common/HashTable/LRUHashMap.h b/src/Common/HashTable/LRUHashMap.h index 95ac5a1932e..68b585cf553 100644 --- a/src/Common/HashTable/LRUHashMap.h +++ b/src/Common/HashTable/LRUHashMap.h @@ -78,20 +78,20 @@ struct LRUHashMapCellNodeTraits }; template -class LRUHashMapImpl : - private HashMapTable< - TKey, - LRUHashMapCell, - Hash, - HashTableGrower<>, - HashTableAllocator> +class LRUHashMapImpl : private HashMapTable< + TKey, + LRUHashMapCell, + Hash, + HashTableGrowerWithPrecalculation<>, + HashTableAllocator> { using Base = HashMapTable< TKey, LRUHashMapCell, Hash, - HashTableGrower<>, + HashTableGrowerWithPrecalculation<>, HashTableAllocator>; + public: using Key = TKey; using Value = TValue; diff --git a/src/Common/HashTable/StringHashTable.h b/src/Common/HashTable/StringHashTable.h index 6a8bdc06218..8d15fde4ce0 100644 --- a/src/Common/HashTable/StringHashTable.h +++ b/src/Common/HashTable/StringHashTable.h @@ -150,10 +150,10 @@ public: }; template -struct StringHashTableGrower : public HashTableGrower +struct StringHashTableGrower : public HashTableGrowerWithPrecalculation { // Smooth growing for string maps - void increaseSize() { this->size_degree += 1; } + void increaseSize() { this->increaseSizeDegree(1); } }; template diff --git a/src/Common/HashTable/TwoLevelHashTable.h b/src/Common/HashTable/TwoLevelHashTable.h index 35c224c53f8..27cc075acd7 100644 --- a/src/Common/HashTable/TwoLevelHashTable.h +++ b/src/Common/HashTable/TwoLevelHashTable.h @@ -15,13 +15,10 @@ */ template -struct TwoLevelHashTableGrower : public HashTableGrower +struct TwoLevelHashTableGrower : public HashTableGrowerWithPrecalculation { /// Increase the size of the hash table. - void increaseSize() - { - this->size_degree += this->size_degree >= 15 ? 1 : 2; - } + void increaseSize() { this->increaseSizeDegree(this->sizeDegree() >= 15 ? 1 : 2); } }; template diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index a82115a9923..b8f8a9d3a88 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -826,66 +826,44 @@ using UTF8CaseInsensitiveStringSearcher = StringSearcher; using ASCIICaseSensitiveTokenSearcher = TokenSearcher; using ASCIICaseInsensitiveTokenSearcher = TokenSearcher; - -/** Uses functions from libc. - * It makes sense to use only with short haystacks when cheap initialization is required. - * There is no option for case-insensitive search for UTF-8 strings. - * It is required that strings are zero-terminated. - */ - -struct LibCASCIICaseSensitiveStringSearcher : public StringSearcherBase +/// Use only with short haystacks where cheap initialization is required. +template +struct StdLibASCIIStringSearcher : public StringSearcherBase { - const char * const needle; + const char * const needle_start; + const char * const needle_end; template requires (sizeof(CharT) == 1) - LibCASCIICaseSensitiveStringSearcher(const CharT * const needle_, const size_t /* needle_size */) - : needle(reinterpret_cast(needle_)) {} + StdLibASCIIStringSearcher(const CharT * const needle_start_, const size_t needle_size_) + : needle_start{reinterpret_cast(needle_start_)} + , needle_end{reinterpret_cast(needle_start) + needle_size_} + {} template requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const CharT * const haystack_end) const + const CharT * search(const CharT * haystack_start, const CharT * const haystack_end) const { - const auto * res = strstr(reinterpret_cast(haystack), reinterpret_cast(needle)); - if (!res) - return haystack_end; - return reinterpret_cast(res); + if constexpr (CaseInsensitive) + { + return std::search( + haystack_start, haystack_end, needle_start, needle_end, + [](char c1, char c2) {return std::toupper(c1) == std::toupper(c2);}); + } + else + { + return std::search( + haystack_start, haystack_end, needle_start, needle_end, + [](char c1, char c2) {return c1 == c2;}); + } } template requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const size_t haystack_size) const + const CharT * search(const CharT * haystack_start, const size_t haystack_length) const { - return search(haystack, haystack + haystack_size); + return search(haystack_start, haystack_start + haystack_length); } }; -struct LibCASCIICaseInsensitiveStringSearcher : public StringSearcherBase -{ - const char * const needle; - - template - requires (sizeof(CharT) == 1) - LibCASCIICaseInsensitiveStringSearcher(const CharT * const needle_, const size_t /* needle_size */) - : needle(reinterpret_cast(needle_)) {} - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const CharT * const haystack_end) const - { - const auto * res = strcasestr(reinterpret_cast(haystack), reinterpret_cast(needle)); - if (!res) - return haystack_end; - return reinterpret_cast(res); - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const size_t haystack_size) const - { - return search(haystack, haystack + haystack_size); - } -}; - - } diff --git a/src/Common/TimerDescriptor.cpp b/src/Common/TimerDescriptor.cpp index a7c74dab8be..1301ebce0ba 100644 --- a/src/Common/TimerDescriptor.cpp +++ b/src/Common/TimerDescriptor.cpp @@ -6,6 +6,8 @@ #include #include +#include + namespace DB { @@ -70,6 +72,8 @@ void TimerDescriptor::drain() const if (errno != EINTR) throwFromErrno("Cannot drain timer_fd", ErrorCodes::CANNOT_READ_FROM_SOCKET); + else + LOG_TEST(&Poco::Logger::get("TimerDescriptor"), "EINTR"); } } } diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 73c7da25a8b..79f9943cb57 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -281,6 +281,13 @@ struct SetResponse : virtual Response size_t bytesSize() const override { return sizeof(stat); } }; +enum class ListRequestType : uint8_t +{ + ALL, + PERSISTENT_ONLY, + EPHEMERAL_ONLY +}; + struct ListRequest : virtual Request { String path; @@ -492,6 +499,7 @@ public: virtual void list( const String & path, + ListRequestType list_request_type, ListCallback callback, WatchCallback watch) = 0; diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index 3d2d5fcb667..3af5dfcc177 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -1,3 +1,4 @@ +#include "Common/ZooKeeper/IKeeper.h" #include #include #include @@ -119,12 +120,17 @@ struct TestKeeperSetRequest final : SetRequest, TestKeeperRequest } }; -struct TestKeeperListRequest final : ListRequest, TestKeeperRequest +struct TestKeeperListRequest : ListRequest, TestKeeperRequest { ResponsePtr createResponse() const override; std::pair process(TestKeeper::Container & container, int64_t zxid) const override; }; +struct TestKeeperFilteredListRequest final : TestKeeperListRequest +{ + ListRequestType list_request_type; +}; + struct TestKeeperCheckRequest final : CheckRequest, TestKeeperRequest { TestKeeperCheckRequest() = default; @@ -390,8 +396,18 @@ std::pair TestKeeperListRequest::process(TestKeeper::Containe child_it != container.end() && startsWith(child_it->first, path_prefix); ++child_it) { + using enum ListRequestType; if (parentPath(child_it->first) == path) - response.names.emplace_back(baseName(child_it->first)); + { + ListRequestType list_request_type = ALL; + if (const auto * filtered_list = dynamic_cast(this)) + list_request_type = filtered_list->list_request_type; + + const auto is_ephemeral = child_it->second.stat.ephemeralOwner != 0; + if (list_request_type == ALL || (is_ephemeral && list_request_type == EPHEMERAL_ONLY) + || (!is_ephemeral && list_request_type == PERSISTENT_ONLY)) + response.names.emplace_back(baseName(child_it->first)); + } } response.stat = it->second.stat; @@ -768,11 +784,13 @@ void TestKeeper::set( void TestKeeper::list( const String & path, + ListRequestType list_request_type, ListCallback callback, WatchCallback watch) { - TestKeeperListRequest request; + TestKeeperFilteredListRequest request; request.path = path; + request.list_request_type = list_request_type; RequestInfo request_info; request_info.request = std::make_shared(std::move(request)); diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index 40cac3094f1..6e77a5d38c1 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -71,6 +71,7 @@ public: void list( const String & path, + ListRequestType list_request_type, ListCallback callback, WatchCallback watch) override; diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index c8ae6b72c3e..5a0be0f76ff 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -9,6 +9,7 @@ #include #include #include +#include "Common/ZooKeeper/IKeeper.h" #include #include #include @@ -312,9 +313,10 @@ static Coordination::WatchCallback callbackForEvent(const EventPtr & watch) Coordination::Error ZooKeeper::getChildrenImpl(const std::string & path, Strings & res, Coordination::Stat * stat, - Coordination::WatchCallback watch_callback) + Coordination::WatchCallback watch_callback, + Coordination::ListRequestType list_request_type) { - auto future_result = asyncTryGetChildrenNoThrow(path, watch_callback); + auto future_result = asyncTryGetChildrenNoThrow(path, watch_callback, list_request_type); if (future_result.wait_for(std::chrono::milliseconds(operation_timeout_ms)) != std::future_status::ready) { @@ -335,26 +337,28 @@ Coordination::Error ZooKeeper::getChildrenImpl(const std::string & path, Strings } } -Strings ZooKeeper::getChildren( - const std::string & path, Coordination::Stat * stat, const EventPtr & watch) +Strings ZooKeeper::getChildren(const std::string & path, Coordination::Stat * stat, const EventPtr & watch) { Strings res; check(tryGetChildren(path, res, stat, watch), path); return res; } -Strings ZooKeeper::getChildrenWatch( - const std::string & path, Coordination::Stat * stat, Coordination::WatchCallback watch_callback) +Strings ZooKeeper::getChildrenWatch(const std::string & path, Coordination::Stat * stat, Coordination::WatchCallback watch_callback) { Strings res; check(tryGetChildrenWatch(path, res, stat, watch_callback), path); return res; } -Coordination::Error ZooKeeper::tryGetChildren(const std::string & path, Strings & res, - Coordination::Stat * stat, const EventPtr & watch) +Coordination::Error ZooKeeper::tryGetChildren( + const std::string & path, + Strings & res, + Coordination::Stat * stat, + const EventPtr & watch, + Coordination::ListRequestType list_request_type) { - Coordination::Error code = getChildrenImpl(path, res, stat, callbackForEvent(watch)); + Coordination::Error code = getChildrenImpl(path, res, stat, callbackForEvent(watch), list_request_type); if (!(code == Coordination::Error::ZOK || code == Coordination::Error::ZNONODE)) throw KeeperException(code, path); @@ -362,10 +366,14 @@ Coordination::Error ZooKeeper::tryGetChildren(const std::string & path, Strings return code; } -Coordination::Error ZooKeeper::tryGetChildrenWatch(const std::string & path, Strings & res, - Coordination::Stat * stat, Coordination::WatchCallback watch_callback) +Coordination::Error ZooKeeper::tryGetChildrenWatch( + const std::string & path, + Strings & res, + Coordination::Stat * stat, + Coordination::WatchCallback watch_callback, + Coordination::ListRequestType list_request_type) { - Coordination::Error code = getChildrenImpl(path, res, stat, watch_callback); + Coordination::Error code = getChildrenImpl(path, res, stat, watch_callback, list_request_type); if (!(code == Coordination::Error::ZOK || code == Coordination::Error::ZNONODE)) throw KeeperException(code, path); @@ -1046,7 +1054,8 @@ std::future ZooKeeper::asyncTrySetNoThrow(const std:: return future; } -std::future ZooKeeper::asyncGetChildren(const std::string & path, Coordination::WatchCallback watch_callback) +std::future ZooKeeper::asyncGetChildren( + const std::string & path, Coordination::WatchCallback watch_callback, Coordination::ListRequestType list_request_type) { auto promise = std::make_shared>(); auto future = promise->get_future(); @@ -1059,11 +1068,12 @@ std::future ZooKeeper::asyncGetChildren(const std::s promise->set_value(response); }; - impl->list(path, std::move(callback), watch_callback); + impl->list(path, list_request_type, std::move(callback), watch_callback); return future; } -std::future ZooKeeper::asyncTryGetChildrenNoThrow(const std::string & path, Coordination::WatchCallback watch_callback) +std::future ZooKeeper::asyncTryGetChildrenNoThrow( + const std::string & path, Coordination::WatchCallback watch_callback, Coordination::ListRequestType list_request_type) { auto promise = std::make_shared>(); auto future = promise->get_future(); @@ -1073,7 +1083,7 @@ std::future ZooKeeper::asyncTryGetChildrenNoThrow(co promise->set_value(response); }; - impl->list(path, std::move(callback), watch_callback); + impl->list(path, list_request_type, std::move(callback), watch_callback); return future; } diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 6aebccd2b4e..d2f92b6b4c3 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -194,11 +194,13 @@ public: /// * The node doesn't exist. Coordination::Error tryGetChildren(const std::string & path, Strings & res, Coordination::Stat * stat = nullptr, - const EventPtr & watch = nullptr); + const EventPtr & watch = nullptr, + Coordination::ListRequestType list_request_type = Coordination::ListRequestType::ALL); Coordination::Error tryGetChildrenWatch(const std::string & path, Strings & res, Coordination::Stat * stat, - Coordination::WatchCallback watch_callback); + Coordination::WatchCallback watch_callback, + Coordination::ListRequestType list_request_type = Coordination::ListRequestType::ALL); /// Performs several operations in a transaction. /// Throws on every error. @@ -279,9 +281,15 @@ public: FutureExists asyncTryExistsNoThrow(const std::string & path, Coordination::WatchCallback watch_callback = {}); using FutureGetChildren = std::future; - FutureGetChildren asyncGetChildren(const std::string & path, Coordination::WatchCallback watch_callback = {}); + FutureGetChildren asyncGetChildren( + const std::string & path, + Coordination::WatchCallback watch_callback = {}, + Coordination::ListRequestType list_request_type = Coordination::ListRequestType::ALL); /// Like the previous one but don't throw any exceptions on future.get() - FutureGetChildren asyncTryGetChildrenNoThrow(const std::string & path, Coordination::WatchCallback watch_callback = {}); + FutureGetChildren asyncTryGetChildrenNoThrow( + const std::string & path, + Coordination::WatchCallback watch_callback = {}, + Coordination::ListRequestType list_request_type = Coordination::ListRequestType::ALL); using FutureSet = std::future; FutureSet asyncSet(const std::string & path, const std::string & data, int32_t version = -1); @@ -335,7 +343,11 @@ private: const std::string & path, std::string & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback); Coordination::Error setImpl(const std::string & path, const std::string & data, int32_t version, Coordination::Stat * stat); Coordination::Error getChildrenImpl( - const std::string & path, Strings & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback); + const std::string & path, + Strings & res, + Coordination::Stat * stat, + Coordination::WatchCallback watch_callback, + Coordination::ListRequestType list_request_type); Coordination::Error multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses); Coordination::Error existsImpl(const std::string & path, Coordination::Stat * stat_, Coordination::WatchCallback watch_callback); Coordination::Error syncImpl(const std::string & path, std::string & returned_path); diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index de2fb630848..837ea5bbad8 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -1,3 +1,4 @@ +#include "Common/ZooKeeper/IKeeper.h" #include #include #include @@ -298,6 +299,32 @@ std::string ZooKeeperListRequest::toStringImpl() const return fmt::format("path = {}", path); } +void ZooKeeperFilteredListRequest::writeImpl(WriteBuffer & out) const +{ + Coordination::write(path, out); + Coordination::write(has_watch, out); + Coordination::write(static_cast(list_request_type), out); +} + +void ZooKeeperFilteredListRequest::readImpl(ReadBuffer & in) +{ + Coordination::read(path, in); + Coordination::read(has_watch, in); + + uint8_t read_request_type{0}; + Coordination::read(read_request_type, in); + list_request_type = static_cast(read_request_type); +} + +std::string ZooKeeperFilteredListRequest::toStringImpl() const +{ + return fmt::format( + "path = {}\n" + "list_request_type = {}", + path, + list_request_type); +} + void ZooKeeperListResponse::readImpl(ReadBuffer & in) { Coordination::read(names, in); diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index c7bfbe95b74..09f797fb47b 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -347,6 +347,18 @@ struct ZooKeeperSimpleListRequest final : ZooKeeperListRequest OpNum getOpNum() const override { return OpNum::SimpleList; } }; +struct ZooKeeperFilteredListRequest final : ZooKeeperListRequest +{ + ListRequestType list_request_type{ListRequestType::ALL}; + + OpNum getOpNum() const override { return OpNum::FilteredList; } + void writeImpl(WriteBuffer & out) const override; + void readImpl(ReadBuffer & in) override; + std::string toStringImpl() const override; + + size_t bytesSize() const override { return ZooKeeperListRequest::bytesSize() + sizeof(list_request_type); } +}; + struct ZooKeeperListResponse : ListResponse, ZooKeeperResponse { void readImpl(ReadBuffer & in) override; diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.cpp b/src/Common/ZooKeeper/ZooKeeperConstants.cpp index b0a05fe6c8d..5b121ed6138 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.cpp +++ b/src/Common/ZooKeeper/ZooKeeperConstants.cpp @@ -64,6 +64,8 @@ std::string toString(OpNum op_num) return "SetACL"; case OpNum::GetACL: return "GetACL"; + case OpNum::FilteredList: + return "FilteredList"; } int32_t raw_op = static_cast(op_num); throw Exception("Operation " + std::to_string(raw_op) + " is unknown", Error::ZUNIMPLEMENTED); diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h index 1ed2c442f6c..44f8437f12c 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.h +++ b/src/Common/ZooKeeper/ZooKeeperConstants.h @@ -32,6 +32,10 @@ enum class OpNum : int32_t Check = 13, Multi = 14, Auth = 100, + + // CH Keeper specific operations + FilteredList = 500, + SessionID = 997, /// Special internal request }; diff --git a/src/Common/ZooKeeper/ZooKeeperIO.cpp b/src/Common/ZooKeeper/ZooKeeperIO.cpp index 066aa1a24f6..c84a8624d78 100644 --- a/src/Common/ZooKeeper/ZooKeeperIO.cpp +++ b/src/Common/ZooKeeper/ZooKeeperIO.cpp @@ -28,6 +28,11 @@ void write(int32_t x, WriteBuffer & out) writeBinary(x, out); } +void write(uint8_t x, WriteBuffer & out) +{ + writeBinary(x, out); +} + void write(OpNum x, WriteBuffer & out) { write(static_cast(x), out); @@ -91,6 +96,11 @@ void read(int64_t & x, ReadBuffer & in) x = __builtin_bswap64(x); } +void read(uint8_t & x, ReadBuffer & in) +{ + readBinary(x, in); +} + void read(int32_t & x, ReadBuffer & in) { readBinary(x, in); diff --git a/src/Common/ZooKeeper/ZooKeeperIO.h b/src/Common/ZooKeeper/ZooKeeperIO.h index c2c6149cd11..ec77b46f3d9 100644 --- a/src/Common/ZooKeeper/ZooKeeperIO.h +++ b/src/Common/ZooKeeper/ZooKeeperIO.h @@ -22,6 +22,7 @@ void write(uint64_t x, WriteBuffer & out); void write(int64_t x, WriteBuffer & out); void write(int32_t x, WriteBuffer & out); +void write(uint8_t x, WriteBuffer & out); void write(OpNum x, WriteBuffer & out); void write(bool x, WriteBuffer & out); void write(const std::string & s, WriteBuffer & out); @@ -50,6 +51,7 @@ void read(uint64_t & x, ReadBuffer & in); #endif void read(int64_t & x, ReadBuffer & in); void read(int32_t & x, ReadBuffer & in); +void read(uint8_t & x, ReadBuffer & in); void read(OpNum & x, ReadBuffer & in); void read(bool & x, ReadBuffer & in); void read(int8_t & x, ReadBuffer & in); diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index bd284ed0c91..8fa6f28c29c 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -1168,11 +1168,13 @@ void ZooKeeper::set( void ZooKeeper::list( const String & path, + ListRequestType list_request_type, ListCallback callback, WatchCallback watch) { - ZooKeeperListRequest request; + ZooKeeperFilteredListRequest request; request.path = path; + request.list_request_type = list_request_type; RequestInfo request_info; request_info.request = std::make_shared(std::move(request)); diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index c4acaf8d1ee..aa27b0eefe9 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -164,6 +164,7 @@ public: void list( const String & path, + ListRequestType list_request_type, ListCallback callback, WatchCallback watch) override; diff --git a/src/Common/tests/gtest_hash_table.cpp b/src/Common/tests/gtest_hash_table.cpp index 35fb471f07c..b06ee5a666e 100644 --- a/src/Common/tests/gtest_hash_table.cpp +++ b/src/Common/tests/gtest_hash_table.cpp @@ -37,7 +37,7 @@ std::set convertToSet(const HashTable & table) TEST(HashTable, Insert) { - using Cont = HashSet, HashTableGrower<1>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<1>>; Cont cont; @@ -49,7 +49,7 @@ TEST(HashTable, Insert) TEST(HashTable, Emplace) { - using Cont = HashSet, HashTableGrower<1>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<1>>; Cont cont; @@ -70,7 +70,7 @@ TEST(HashTable, Emplace) TEST(HashTable, Lookup) { - using Cont = HashSet, HashTableGrower<1>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<1>>; Cont cont; @@ -89,7 +89,7 @@ TEST(HashTable, Lookup) TEST(HashTable, Iteration) { - using Cont = HashSet, HashTableGrower<1>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<1>>; Cont cont; @@ -107,7 +107,7 @@ TEST(HashTable, Erase) { { /// Check zero element deletion - using Cont = HashSet, HashTableGrower<4>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<4>>; Cont cont; cont.insert(0); @@ -119,7 +119,7 @@ TEST(HashTable, Erase) ASSERT_TRUE(cont.find(0) == nullptr); } { - using Cont = HashSet, HashTableGrower<4>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<4>>; Cont cont; /// [.(1)..............] erase of (1). @@ -132,7 +132,7 @@ TEST(HashTable, Erase) ASSERT_TRUE(cont.find(1) == nullptr); } { - using Cont = HashSet, HashTableGrower<4>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<4>>; Cont cont; /// [.(1)(2)(3)............] erase of (1) does not break search for (2) (3). @@ -152,7 +152,7 @@ TEST(HashTable, Erase) ASSERT_EQ(cont.size(), 0); } { - using Cont = HashSet, HashTableGrower<4>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<4>>; Cont cont; /// [.(1)(17).............] erase of (1) breaks search for (17) because their natural position is 1. @@ -164,7 +164,7 @@ TEST(HashTable, Erase) ASSERT_TRUE(cont.find(17) != nullptr && cont.find(17)->getKey() == 17); } { - using Cont = HashSet, HashTableGrower<4>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<4>>; Cont cont; /// [.(1)(2)(3)(17)...........] erase of (2) breaks search for (17) because their natural position is 1. @@ -181,7 +181,7 @@ TEST(HashTable, Erase) ASSERT_TRUE(cont.find(17) != nullptr && cont.find(17)->getKey() == 17); } { - using Cont = HashSet, HashTableGrower<4>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<4>>; Cont cont; /// [(16)(30)............(14)(15)] erase of (16) breaks search for (30) because their natural position is 14. @@ -197,7 +197,7 @@ TEST(HashTable, Erase) ASSERT_TRUE(cont.find(30) != nullptr && cont.find(30)->getKey() == 30); } { - using Cont = HashSet, HashTableGrower<4>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<4>>; Cont cont; /// [(16)(30)............(14)(15)] erase of (15) breaks search for (30) because their natural position is 14. @@ -213,7 +213,7 @@ TEST(HashTable, Erase) ASSERT_TRUE(cont.find(30) != nullptr && cont.find(30)->getKey() == 30); } { - using Cont = HashSet, HashTableGrower<1>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<1>>; Cont cont; for (size_t i = 0; i < 5000; ++i) @@ -249,7 +249,7 @@ TEST(HashTable, SerializationDeserialization) { { /// Use dummy hash to make it reproducible if default hash implementation will be changed - using Cont = HashSet, HashTableGrower<1>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<1>>; Cont cont; @@ -271,7 +271,7 @@ TEST(HashTable, SerializationDeserialization) ASSERT_EQ(convertToSet(cont), convertToSet(deserialized)); } { - using Cont = HashSet, HashTableGrower<1>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<1>>; Cont cont; @@ -289,7 +289,7 @@ TEST(HashTable, SerializationDeserialization) ASSERT_EQ(convertToSet(cont), convertToSet(deserialized)); } { - using Cont = HashSet, HashTableGrower<1>>; + using Cont = HashSet, HashTableGrowerWithPrecalculation<1>>; Cont cont; WriteBufferFromOwnString wb; diff --git a/src/Coordination/Keeper4LWInfo.h b/src/Coordination/Keeper4LWInfo.h index bf7267a68e2..7d90152611e 100644 --- a/src/Coordination/Keeper4LWInfo.h +++ b/src/Coordination/Keeper4LWInfo.h @@ -3,6 +3,7 @@ #include #include +#include namespace DB { @@ -42,7 +43,7 @@ struct Keeper4LWInfo if (is_follower) return "follower"; - throw Exception(ErrorCodes::LOGICAL_ERROR, "RAFT server has undefined state state, it's a bug"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "RAFT server has undefined state, it's a bug"); } }; diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 9ad5fe9e8ed..213c924af6b 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -651,12 +651,7 @@ uint64_t KeeperDispatcher::getSnapDirSize() const Keeper4LWInfo KeeperDispatcher::getKeeper4LWInfo() const { - Keeper4LWInfo result; - result.is_follower = server->isFollower(); - result.is_standalone = !result.is_follower && server->getFollowerCount() == 0; - result.is_leader = isLeader(); - result.is_observer = server->isObserver(); - result.has_leader = hasLeader(); + Keeper4LWInfo result = server->getPartiallyFilled4LWInfo(); { std::lock_guard lock(push_request_mutex); result.outstanding_requests_count = requests_queue->size(); @@ -665,13 +660,6 @@ Keeper4LWInfo KeeperDispatcher::getKeeper4LWInfo() const std::lock_guard lock(session_to_response_callback_mutex); result.alive_connections_count = session_to_response_callback.size(); } - if (result.is_leader) - { - result.follower_count = server->getFollowerCount(); - result.synced_follower_count = server->getSyncedFollowerCount(); - } - result.total_nodes_count = server->getKeeperStateMachine()->getNodesCount(); - result.last_zxid = server->getKeeperStateMachine()->getLastProcessedZxid(); return result; } diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index b632327a165..5e2701299f4 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -150,6 +150,11 @@ public: return server->isLeader(); } + bool isFollower() const + { + return server->isFollower(); + } + bool hasLeader() const { return server->isLeaderAlive(); diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 8a46d8ee296..2a3d17af403 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -444,14 +444,12 @@ bool KeeperServer::isLeader() const return raft_instance->is_leader(); } - bool KeeperServer::isObserver() const { auto srv_config = state_manager->get_srv_config(); return srv_config->is_learner(); } - bool KeeperServer::isFollower() const { return !isLeader() && !isObserver(); @@ -806,4 +804,25 @@ bool KeeperServer::waitConfigurationUpdate(const ConfigUpdateAction & task) return true; } +Keeper4LWInfo KeeperServer::getPartiallyFilled4LWInfo() const +{ + Keeper4LWInfo result; + result.is_leader = raft_instance->is_leader(); + + auto srv_config = state_manager->get_srv_config(); + result.is_observer = srv_config->is_learner(); + + result.is_follower = !result.is_leader && !result.is_observer; + result.has_leader = result.is_leader || isLeaderAlive(); + result.is_standalone = !result.is_follower && getFollowerCount() == 0; + if (result.is_leader) + { + result.follower_count = getFollowerCount(); + result.synced_follower_count = getSyncedFollowerCount(); + } + result.total_nodes_count = getKeeperStateMachine()->getNodesCount(); + result.last_zxid = getKeeperStateMachine()->getLastProcessedZxid(); + return result; +} + } diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index 8c21cf47d94..f6524ce97a1 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB { @@ -95,6 +96,8 @@ public: bool isLeaderAlive() const; + Keeper4LWInfo getPartiallyFilled4LWInfo() const; + /// @return follower count if node is not leader return 0 uint64_t getFollowerCount() const; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 368b23f34d2..1b399e8cc92 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -289,6 +289,9 @@ void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data) if (!request_for_session.zxid) request_for_session.zxid = log_idx; + if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) + return; + std::lock_guard lock(storage_and_responses_lock); storage->rollbackRequest(request_for_session.zxid); } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 21265f0bd61..fd1fab5b6b0 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -6,6 +6,7 @@ #include #include #include +#include "Common/ZooKeeper/ZooKeeperCommon.h" #include #include #include @@ -19,6 +20,7 @@ #include #include #include +#include namespace DB { @@ -1161,6 +1163,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc } auto & container = storage.container; + auto node_it = container.find(request.path); if (node_it == container.end()) { @@ -1178,8 +1181,31 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc const auto & children = node_it->value.getChildren(); response.names.reserve(children.size()); + const auto add_child = [&](const auto child) + { + using enum Coordination::ListRequestType; + + auto list_request_type = ALL; + if (auto * filtered_list = dynamic_cast(&request)) + list_request_type = filtered_list->list_request_type; + + if (list_request_type == ALL) + return true; + + auto child_path = (std::filesystem::path(request.path) / child.toView()).generic_string(); + auto child_it = container.find(child_path); + if (child_it == container.end()) + onStorageInconsistency(); + + const auto is_ephemeral = child_it->value.stat.ephemeralOwner != 0; + return (is_ephemeral && list_request_type == EPHEMERAL_ONLY) || (!is_ephemeral && list_request_type == PERSISTENT_ONLY); + }; + for (const auto child : children) - response.names.push_back(child.toString()); + { + if (add_child(child)) + response.names.push_back(child.toString()); + } response.stat = node_it->value.stat; response.error = Coordination::Error::ZOK; @@ -1623,7 +1649,7 @@ struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProc void KeeperStorage::finalize() { if (finalized) - throw DB::Exception("Testkeeper storage already finalized", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception("KeeperStorage already finalized", ErrorCodes::LOGICAL_ERROR); finalized = true; @@ -1689,6 +1715,7 @@ KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() registerKeeperRequestProcessor(*this); registerKeeperRequestProcessor(*this); registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); registerKeeperRequestProcessor(*this); registerKeeperRequestProcessor(*this); registerKeeperRequestProcessor(*this); diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index ee75f2a0860..bd0d329ef8d 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -1956,6 +1957,84 @@ TEST_P(CoordinationTest, TestUncommittedStateBasicCrud) ASSERT_FALSE(get_committed_data()); } +TEST_P(CoordinationTest, TestListRequestTypes) +{ + using namespace DB; + using namespace Coordination; + + KeeperStorage storage{500, "", true}; + + int64_t zxid = 0; + + static constexpr std::string_view path = "/test"; + + const auto create_path = [&](bool is_ephemeral) + { + const auto create_request = std::make_shared(); + int new_zxid = ++zxid; + create_request->path = path; + create_request->is_sequential = true; + create_request->is_ephemeral = is_ephemeral; + storage.preprocessRequest(create_request, 1, 0, new_zxid); + auto responses = storage.processRequest(create_request, 1, new_zxid); + + EXPECT_GE(responses.size(), 1); + const auto & create_response = dynamic_cast(*responses[0].response); + return create_response.path_created; + }; + + static constexpr size_t persistent_num = 5; + std::unordered_set expected_persistent_children; + for (size_t i = 0; i < persistent_num; ++i) + { + expected_persistent_children.insert(getBaseName(create_path(false)).toString()); + } + ASSERT_EQ(expected_persistent_children.size(), persistent_num); + + static constexpr size_t ephemeral_num = 5; + std::unordered_set expected_ephemeral_children; + for (size_t i = 0; i < ephemeral_num; ++i) + { + expected_ephemeral_children.insert(getBaseName(create_path(true)).toString()); + } + ASSERT_EQ(expected_ephemeral_children.size(), ephemeral_num); + + const auto get_children = [&](const auto list_request_type) + { + const auto list_request = std::make_shared(); + int new_zxid = ++zxid; + list_request->path = parentPath(StringRef{path}).toString(); + list_request->list_request_type = list_request_type; + storage.preprocessRequest(list_request, 1, 0, new_zxid); + auto responses = storage.processRequest(list_request, 1, new_zxid); + + EXPECT_GE(responses.size(), 1); + const auto & list_response = dynamic_cast(*responses[0].response); + return list_response.names; + }; + + const auto persistent_children = get_children(ListRequestType::PERSISTENT_ONLY); + EXPECT_EQ(persistent_children.size(), persistent_num); + for (const auto & child : persistent_children) + { + EXPECT_TRUE(expected_persistent_children.contains(child)) << "Missing persistent child " << child; + } + + const auto ephemeral_children = get_children(ListRequestType::EPHEMERAL_ONLY); + EXPECT_EQ(ephemeral_children.size(), ephemeral_num); + for (const auto & child : ephemeral_children) + { + EXPECT_TRUE(expected_ephemeral_children.contains(child)) << "Missing ephemeral child " << child; + } + + const auto all_children = get_children(ListRequestType::ALL); + EXPECT_EQ(all_children.size(), ephemeral_num + persistent_num); + for (const auto & child : all_children) + { + EXPECT_TRUE(expected_ephemeral_children.contains(child) || expected_persistent_children.contains(child)) << "Missing child " << child; + } +} + INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite, CoordinationTest, ::testing::ValuesIn(std::initializer_list{ diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp index 2f37d2ea951..3a4b66e6266 100644 --- a/src/Core/Field.cpp +++ b/src/Core/Field.cpp @@ -559,4 +559,31 @@ String toString(const Field & x) x); } +String fieldTypeToString(Field::Types::Which type) +{ + switch (type) + { + case Field::Types::Which::Null: return "Null"; + case Field::Types::Which::Array: return "Array"; + case Field::Types::Which::Tuple: return "Tuple"; + case Field::Types::Which::Map: return "Map"; + case Field::Types::Which::Object: return "Object"; + case Field::Types::Which::AggregateFunctionState: return "AggregateFunctionState"; + case Field::Types::Which::Bool: return "Bool"; + case Field::Types::Which::String: return "String"; + case Field::Types::Which::Decimal32: return "Decimal32"; + case Field::Types::Which::Decimal64: return "Decimal64"; + case Field::Types::Which::Decimal128: return "Decimal128"; + case Field::Types::Which::Decimal256: return "Decimal256"; + case Field::Types::Which::Float64: return "Float64"; + case Field::Types::Which::Int64: return "Int64"; + case Field::Types::Which::Int128: return "Int128"; + case Field::Types::Which::Int256: return "Int256"; + case Field::Types::Which::UInt64: return "UInt64"; + case Field::Types::Which::UInt128: return "UInt128"; + case Field::Types::Which::UInt256: return "UInt256"; + case Field::Types::Which::UUID: return "UUID"; + } +} + } diff --git a/src/Core/Field.h b/src/Core/Field.h index 4948ec4ae61..08274876914 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -1011,6 +1011,8 @@ void writeFieldText(const Field & x, WriteBuffer & buf); String toString(const Field & x); +String fieldTypeToString(Field::Types::Which type); + } template <> diff --git a/src/Core/PostgreSQL/Connection.cpp b/src/Core/PostgreSQL/Connection.cpp index dfdf14d3506..5a589a80d02 100644 --- a/src/Core/PostgreSQL/Connection.cpp +++ b/src/Core/PostgreSQL/Connection.cpp @@ -73,6 +73,7 @@ void Connection::connect() if (!connection || !connection->is_open()) updateConnection(); } + } #endif diff --git a/src/Core/PostgreSQL/Connection.h b/src/Core/PostgreSQL/Connection.h index 97ce3c152d5..d39659a9953 100644 --- a/src/Core/PostgreSQL/Connection.h +++ b/src/Core/PostgreSQL/Connection.h @@ -32,7 +32,10 @@ struct ConnectionInfo class Connection : private boost::noncopyable { public: - explicit Connection(const ConnectionInfo & connection_info_, bool replication_ = false, size_t num_tries = 3); + explicit Connection( + const ConnectionInfo & connection_info_, + bool replication_ = false, + size_t num_tries = 3); void execWithRetry(const std::function & exec); diff --git a/src/Core/PostgreSQL/ConnectionHolder.h b/src/Core/PostgreSQL/ConnectionHolder.h index 38e321e222c..2fd8717c643 100644 --- a/src/Core/PostgreSQL/ConnectionHolder.h +++ b/src/Core/PostgreSQL/ConnectionHolder.h @@ -20,11 +20,20 @@ class ConnectionHolder { public: - ConnectionHolder(PoolPtr pool_, ConnectionPtr connection_) : pool(pool_), connection(std::move(connection_)) {} + ConnectionHolder(PoolPtr pool_, ConnectionPtr connection_, bool auto_close_) + : pool(pool_) + , connection(std::move(connection_)) + , auto_close(auto_close_) + {} ConnectionHolder(const ConnectionHolder & other) = delete; - ~ConnectionHolder() { pool->returnObject(std::move(connection)); } + ~ConnectionHolder() + { + if (auto_close) + connection.reset(); + pool->returnObject(std::move(connection)); + } pqxx::connection & get() { @@ -39,6 +48,7 @@ public: private: PoolPtr pool; ConnectionPtr connection; + bool auto_close; }; using ConnectionHolderPtr = std::unique_ptr; diff --git a/src/Core/PostgreSQL/PoolWithFailover.cpp b/src/Core/PostgreSQL/PoolWithFailover.cpp index 844c60087e0..1bac17de579 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.cpp +++ b/src/Core/PostgreSQL/PoolWithFailover.cpp @@ -5,6 +5,7 @@ #include "Utils.h" #include #include +#include #include #include @@ -20,10 +21,14 @@ namespace postgres { PoolWithFailover::PoolWithFailover( - const DB::ExternalDataSourcesConfigurationByPriority & configurations_by_priority, - size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_) - : pool_wait_timeout(pool_wait_timeout_) - , max_tries(max_tries_) + const DB::ExternalDataSourcesConfigurationByPriority & configurations_by_priority, + size_t pool_size, + size_t pool_wait_timeout_, + size_t max_tries_, + bool auto_close_connection_) + : pool_wait_timeout(pool_wait_timeout_) + , max_tries(max_tries_) + , auto_close_connection(auto_close_connection_) { LOG_TRACE(&Poco::Logger::get("PostgreSQLConnectionPool"), "PostgreSQL connection pool size: {}, connection wait timeout: {}, max failover tries: {}", pool_size, pool_wait_timeout, max_tries_); @@ -40,10 +45,14 @@ PoolWithFailover::PoolWithFailover( } PoolWithFailover::PoolWithFailover( - const DB::StoragePostgreSQLConfiguration & configuration, - size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_) + const DB::StoragePostgreSQLConfiguration & configuration, + size_t pool_size, + size_t pool_wait_timeout_, + size_t max_tries_, + bool auto_close_connection_) : pool_wait_timeout(pool_wait_timeout_) , max_tries(max_tries_) + , auto_close_connection(auto_close_connection_) { LOG_TRACE(&Poco::Logger::get("PostgreSQLConnectionPool"), "PostgreSQL connection pool size: {}, connection wait timeout: {}, max failover tries: {}", pool_size, pool_wait_timeout, max_tries_); @@ -94,7 +103,9 @@ ConnectionHolderPtr PoolWithFailover::get() catch (const pqxx::broken_connection & pqxx_error) { LOG_ERROR(log, "Connection error: {}", pqxx_error.what()); - error_message << "Try " << try_idx + 1 << ". Connection to `" << replica.connection_info.host_port << "` failed: " << pqxx_error.what() << "\n"; + error_message << fmt::format( + "Try {}. Connection to {} failed with error: {}\n", + try_idx + 1, DB::backQuote(replica.connection_info.host_port), pqxx_error.what()); replica.pool->returnObject(std::move(connection)); continue; @@ -105,7 +116,7 @@ ConnectionHolderPtr PoolWithFailover::get() throw; } - auto connection_holder = std::make_unique(replica.pool, std::move(connection)); + auto connection_holder = std::make_unique(replica.pool, std::move(connection), auto_close_connection); /// Move all traversed replicas to the end. if (replicas.size() > 1) diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h index 4e3a17b5e9c..81c94d92141 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.h +++ b/src/Core/PostgreSQL/PoolWithFailover.h @@ -12,6 +12,10 @@ #include +static constexpr inline auto POSTGRESQL_POOL_DEFAULT_SIZE = 16; +static constexpr inline auto POSTGRESQL_POOL_WAIT_TIMEOUT = 5000; +static constexpr inline auto POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES = 5; + namespace postgres { @@ -21,21 +25,19 @@ class PoolWithFailover using RemoteDescription = std::vector>; public: - static constexpr inline auto POSTGRESQL_POOL_DEFAULT_SIZE = 16; - static constexpr inline auto POSTGRESQL_POOL_WAIT_TIMEOUT = 5000; - static constexpr inline auto POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES = 5; - - explicit PoolWithFailover( + PoolWithFailover( const DB::ExternalDataSourcesConfigurationByPriority & configurations_by_priority, - size_t pool_size = POSTGRESQL_POOL_DEFAULT_SIZE, - size_t pool_wait_timeout = POSTGRESQL_POOL_WAIT_TIMEOUT, - size_t max_tries_ = POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); + size_t pool_size, + size_t pool_wait_timeout, + size_t max_tries_, + bool auto_close_connection_); explicit PoolWithFailover( const DB::StoragePostgreSQLConfiguration & configuration, - size_t pool_size = POSTGRESQL_POOL_DEFAULT_SIZE, - size_t pool_wait_timeout = POSTGRESQL_POOL_WAIT_TIMEOUT, - size_t max_tries_ = POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); + size_t pool_size, + size_t pool_wait_timeout, + size_t max_tries_, + bool auto_close_connection_); PoolWithFailover(const PoolWithFailover & other) = delete; @@ -58,6 +60,7 @@ private: ReplicasWithPriority replicas_with_priority; size_t pool_wait_timeout; size_t max_tries; + bool auto_close_connection; std::mutex mutex; Poco::Logger * log = &Poco::Logger::get("PostgreSQLConnectionPool"); }; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f1f7bc7f2ab..5597d9076a4 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -96,7 +96,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \ M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited. Only has meaning at server startup.", 0) \ M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited. Only has meaning at server startup.", 0) \ - M(Bool, stream_like_engine_allow_direct_select, false, "Allow direct SELECT query for Kafka, RabbitMQ and FileLog engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.", 0) \ + M(Bool, stream_like_engine_allow_direct_select, false, "Allow direct SELECT query for Kafka, RabbitMQ, FileLog, Redis Streams and NATS engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.", 0) \ + M(String, stream_like_engine_insert_queue, "", "When stream like engine reads from multiple queues, user will need to select one queue to insert into when writing. Used by Redis Streams and NATS.", 0) \ \ M(Milliseconds, distributed_directory_monitor_sleep_time_ms, 100, "Sleep time for StorageDistributed DirectoryMonitors, in case of any errors delay grows exponentially.", 0) \ M(Milliseconds, distributed_directory_monitor_max_sleep_time_ms, 30000, "Maximum sleep time for StorageDistributed DirectoryMonitors, it limits exponential growth too.", 0) \ @@ -428,6 +429,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) \ M(UInt64, postgresql_connection_pool_size, 16, "Connection pool size for PostgreSQL table engine and database engine.", 0) \ M(UInt64, postgresql_connection_pool_wait_timeout, 5000, "Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.", 0) \ + M(Bool, postgresql_connection_pool_auto_close_connection, false, "Close connection before returning connection to the pool.", 0) \ M(UInt64, glob_expansion_max_elements, 1000, "Maximum number of allowed addresses (For external storages, table functions, etc).", 0) \ M(UInt64, odbc_bridge_connection_pool_size, 16, "Connection pool size for each connection settings string in ODBC bridge.", 0) \ M(Bool, odbc_bridge_use_connection_pooling, true, "Use connection pooling in ODBC bridge. If set to false, a new connection is created every time", 0) \ @@ -606,6 +608,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, throw_if_no_data_to_insert, true, "Enables or disables empty INSERTs, enabled by default", 0) \ M(Bool, compatibility_ignore_auto_increment_in_create_table, false, "Ignore AUTO_INCREMENT keyword in column declaration if true, otherwise return error. It simplifies migration from MySQL", 0) \ M(Bool, multiple_joins_try_to_keep_original_names, false, "Do not add aliases to top level expression list on multiple joins rewrite", 0) \ + M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. @@ -762,6 +765,12 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) \ M(String, input_format_mysql_dump_table_name, "", "Name of the table in MySQL dump from which to read data", 0) \ M(Bool, input_format_mysql_dump_map_column_names, true, "Match columns from table in MySQL dump and columns from ClickHouse table by names", 0) \ + \ + M(UInt64, output_format_sql_insert_max_batch_size, DEFAULT_BLOCK_SIZE, "The maximum number of rows in one INSERT statement.", 0) \ + M(String, output_format_sql_insert_table_name, "table", "The name of table in the output INSERT query", 0) \ + M(Bool, output_format_sql_insert_include_column_names, true, "Include column names in INSERT query", 0) \ + M(Bool, output_format_sql_insert_use_replace, false, "Use REPLACE statement instead of INSERT", 0) \ + M(Bool, output_format_sql_insert_quote_names, true, "Quote column names with '`' characters", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in index 3fc2503aaa5..46c77593d4e 100644 --- a/src/Core/config_core.h.in +++ b/src/Core/config_core.h.in @@ -6,6 +6,7 @@ #cmakedefine01 USE_MYSQL #cmakedefine01 USE_RDKAFKA #cmakedefine01 USE_AMQPCPP +#cmakedefine01 USE_NATSIO #cmakedefine01 USE_EMBEDDED_COMPILER #cmakedefine01 USE_SSL #cmakedefine01 USE_LDAP diff --git a/src/Databases/DDLRenamingVisitor.cpp b/src/Databases/DDLRenamingVisitor.cpp index caedfc55f3d..7ea5dbeda83 100644 --- a/src/Databases/DDLRenamingVisitor.cpp +++ b/src/Databases/DDLRenamingVisitor.cpp @@ -19,7 +19,6 @@ namespace DB namespace ErrorCodes { extern const int WRONG_DDL_RENAMING_SETTINGS; - extern const int LOGICAL_ERROR; } namespace @@ -31,24 +30,40 @@ namespace { /// CREATE TEMPORARY TABLE String table_name = create.getTable(); - const auto & new_table_name = data.renaming_map.getNewTemporaryTableName(table_name); - if (new_table_name != table_name) - create.setTable(new_table_name); + QualifiedTableName full_table_name{DatabaseCatalog::TEMPORARY_DATABASE, table_name}; + const auto & new_table_name = data.renaming_map.getNewTableName(full_table_name); + if (new_table_name != full_table_name) + { + create.setTable(new_table_name.table); + if (new_table_name.database != DatabaseCatalog::TEMPORARY_DATABASE) + { + create.temporary = false; + create.setDatabase(new_table_name.database); + } + } } else if (create.table) { /// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW - QualifiedTableName qualified_name; - qualified_name.table = create.getTable(); - qualified_name.database = create.getDatabase(); + QualifiedTableName full_name; + full_name.table = create.getTable(); + full_name.database = create.getDatabase(); - if (!qualified_name.database.empty() && !qualified_name.table.empty()) + if (!full_name.database.empty() && !full_name.table.empty()) { - auto new_qualified_name = data.renaming_map.getNewTableName(qualified_name); - if (new_qualified_name != qualified_name) + auto new_table_name = data.renaming_map.getNewTableName(full_name); + if (new_table_name != full_name) { - create.setTable(new_qualified_name.table); - create.setDatabase(new_qualified_name.database); + create.setTable(new_table_name.table); + if (new_table_name.database == DatabaseCatalog::TEMPORARY_DATABASE) + { + create.temporary = true; + create.setDatabase(""); + } + else + { + create.setDatabase(new_table_name.database); + } } } } @@ -291,17 +306,10 @@ void DDLRenamingVisitor::visit(ASTPtr ast, const Data & data) bool DDLRenamingVisitor::needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } -void renameDatabaseAndTableNameInCreateQuery(const ContextPtr & global_context, const DDLRenamingMap & renaming_map, ASTPtr & ast) -{ - DDLRenamingVisitor::Data data{global_context, renaming_map, ast}; - DDLRenamingVisitor::Visitor{data}.visit(ast); -} - - void DDLRenamingMap::setNewTableName(const QualifiedTableName & old_table_name, const QualifiedTableName & new_table_name) { if (old_table_name.table.empty() || old_table_name.database.empty() || new_table_name.table.empty() || new_table_name.database.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty names are not allowed"); + throw Exception(ErrorCodes::WRONG_DDL_RENAMING_SETTINGS, "Empty names are not allowed"); auto it = old_to_new_table_names.find(old_table_name); if ((it != old_to_new_table_names.end())) @@ -321,7 +329,7 @@ void DDLRenamingMap::setNewTableName(const QualifiedTableName & old_table_name, void DDLRenamingMap::setNewDatabaseName(const String & old_database_name, const String & new_database_name) { if (old_database_name.empty() || new_database_name.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty names are not allowed"); + throw Exception(ErrorCodes::WRONG_DDL_RENAMING_SETTINGS, "Empty names are not allowed"); auto it = old_to_new_database_names.find(old_database_name); if ((it != old_to_new_database_names.end())) @@ -351,28 +359,11 @@ QualifiedTableName DDLRenamingMap::getNewTableName(const QualifiedTableName & ol return {getNewDatabaseName(old_table_name.database), old_table_name.table}; } -void DDLRenamingMap::setNewTemporaryTableName(const String & old_table_name, const String & new_table_name) + +void renameDatabaseAndTableNameInCreateQuery(ASTPtr ast, const DDLRenamingMap & renaming_map, const ContextPtr & global_context) { - if (old_table_name.empty() || new_table_name.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty names are not allowed"); - - auto it = old_to_new_temporary_table_names.find(old_table_name); - if ((it != old_to_new_temporary_table_names.end())) - { - if (it->second == new_table_name) - return; - throw Exception(ErrorCodes::WRONG_DDL_RENAMING_SETTINGS, "Wrong renaming: it's specified that temporary table {} should be renamed to {} and to {} at the same time", - backQuoteIfNeed(old_table_name), backQuoteIfNeed(it->second), backQuoteIfNeed(new_table_name)); - } - old_to_new_temporary_table_names[old_table_name] = new_table_name; -} - -const String & DDLRenamingMap::getNewTemporaryTableName(const String & old_table_name) const -{ - auto it = old_to_new_temporary_table_names.find(old_table_name); - if (it != old_to_new_temporary_table_names.end()) - return it->second; - return old_table_name; + DDLRenamingVisitor::Data data{ast, renaming_map, global_context}; + DDLRenamingVisitor::Visitor{data}.visit(ast); } } diff --git a/src/Databases/DDLRenamingVisitor.h b/src/Databases/DDLRenamingVisitor.h index 9d0f770d105..44146a8ee6b 100644 --- a/src/Databases/DDLRenamingVisitor.h +++ b/src/Databases/DDLRenamingVisitor.h @@ -17,7 +17,7 @@ class DDLRenamingMap; /// Changes names of databases or tables in a create query according to a specified renaming map. /// Does not validate AST, works a best-effort way. -void renameDatabaseAndTableNameInCreateQuery(const ContextPtr & global_context, const DDLRenamingMap & renaming_map, ASTPtr & ast); +void renameDatabaseAndTableNameInCreateQuery(ASTPtr ast, const DDLRenamingMap & renaming_map, const ContextPtr & global_context); /// Renaming map keeps information about new names of databases or tables. class DDLRenamingMap @@ -25,16 +25,13 @@ class DDLRenamingMap public: void setNewTableName(const QualifiedTableName & old_table_name, const QualifiedTableName & new_table_name); void setNewDatabaseName(const String & old_database_name, const String & new_database_name); - void setNewTemporaryTableName(const String & old_table_name, const String & new_table_name); QualifiedTableName getNewTableName(const QualifiedTableName & old_table_name) const; const String & getNewDatabaseName(const String & old_database_name) const; - const String & getNewTemporaryTableName(const String & old_table_name) const; private: std::unordered_map old_to_new_table_names; std::unordered_map old_to_new_database_names; - std::unordered_map old_to_new_temporary_table_names; }; /// Visits ASTCreateQuery and changes names of databases or tables. @@ -43,9 +40,9 @@ class DDLRenamingVisitor public: struct Data { - ContextPtr global_context; - const DDLRenamingMap & renaming_map; ASTPtr create_query; + const DDLRenamingMap & renaming_map; + ContextPtr global_context; }; using Visitor = InDepthNodeVisitor; diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index af82d382063..df1e58ca852 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -344,9 +344,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String use_table_cache = safeGetLiteralValue(engine_args[5], engine_name); } - auto pool = std::make_shared(configuration, - context->getSettingsRef().postgresql_connection_pool_size, - context->getSettingsRef().postgresql_connection_pool_wait_timeout); + const auto & settings = context->getSettingsRef(); + auto pool = std::make_shared( + configuration, + settings.postgresql_connection_pool_size, + settings.postgresql_connection_pool_wait_timeout, + POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, + settings.postgresql_connection_pool_auto_close_connection); return std::make_shared( context, metadata_path, engine_define, database_name, configuration, pool, use_table_cache); diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 5268252731f..8540c785419 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes { extern const int UNKNOWN_TABLE; extern const int LOGICAL_ERROR; + extern const int INCONSISTENT_METADATA_FOR_BACKUP; } DatabaseMemory::DatabaseMemory(const String & name_, ContextPtr context_) @@ -145,4 +146,42 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl DatabaseCatalog::instance().updateLoadingDependencies(table_id, std::move(new_dependencies)); } +std::vector> DatabaseMemory::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const +{ + /// We need a special processing for the temporary database. + if (getDatabaseName() != DatabaseCatalog::TEMPORARY_DATABASE) + return DatabaseWithOwnTablesBase::getTablesForBackup(filter, local_context); + + std::vector> res; + + /// `this->tables` for the temporary database doesn't contain real names of tables. + /// That's why we need to call Context::getExternalTables() and then resolve those names using tryResolveStorageID() below. + auto external_tables = local_context->getExternalTables(); + + for (const auto & [table_name, storage] : external_tables) + { + if (!filter(table_name)) + continue; + + auto storage_id = local_context->tryResolveStorageID(StorageID{"", table_name}, Context::ResolveExternal); + if (!storage_id) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't resolve the name of temporary table {}", backQuoteIfNeed(table_name)); + + /// Here `storage_id.table_name` looks like looks like "_tmp_ab9b15a3-fb43-4670-abec-14a0e9eb70f1" + /// it's not the real name of the table. + auto create_table_query = tryGetCreateTableQuery(storage_id.table_name, local_context); + if (!create_table_query) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't get a create query for temporary table {}", backQuoteIfNeed(table_name)); + + const auto & create = create_table_query->as(); + if (create.getTable() != table_name) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected name {} for temporary table {}", backQuoteIfNeed(create.getTable()), backQuoteIfNeed(table_name)); + + storage->adjustCreateQueryForBackup(create_table_query); + res.emplace_back(create_table_query, storage); + } + + return res; +} + } diff --git a/src/Databases/DatabaseMemory.h b/src/Databases/DatabaseMemory.h index eef9f306343..6262543b0c1 100644 --- a/src/Databases/DatabaseMemory.h +++ b/src/Databases/DatabaseMemory.h @@ -50,6 +50,8 @@ public: void alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) override; + std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override; + private: const String data_path; using NameToASTCreate = std::unordered_map; diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 5a22eeaf570..a7ad632efff 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -44,6 +44,7 @@ namespace ErrorCodes extern const int INCORRECT_QUERY; extern const int ALL_CONNECTION_TRIES_FAILED; extern const int NO_ACTIVE_REPLICAS; + extern const int INCONSISTENT_METADATA_FOR_BACKUP; extern const int CANNOT_RESTORE_TABLE; } @@ -923,7 +924,50 @@ String DatabaseReplicated::readMetadataFile(const String & table_name) const } -void DatabaseReplicated::createTableRestoredFromBackup(const ASTPtr & create_table_query, const RestorerFromBackup & restorer) +std::vector> +DatabaseReplicated::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr &) const +{ + /// Here we read metadata from ZooKeeper. We could do that by simple call of DatabaseAtomic::getTablesForBackup() however + /// reading from ZooKeeper is better because thus we won't be dependent on how fast the replication queue of this database is. + std::vector> res; + auto zookeeper = getContext()->getZooKeeper(); + auto escaped_table_names = zookeeper->getChildren(zookeeper_path + "/metadata"); + for (const auto & escaped_table_name : escaped_table_names) + { + String table_name = unescapeForFileName(escaped_table_name); + if (!filter(table_name)) + continue; + String zk_metadata; + if (!zookeeper->tryGet(zookeeper_path + "/metadata/" + escaped_table_name, zk_metadata)) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Metadata for table {} was not found in ZooKeeper", table_name); + + ParserCreateQuery parser; + auto create_table_query = parseQuery(parser, zk_metadata, 0, getContext()->getSettingsRef().max_parser_depth); + + auto & create = create_table_query->as(); + create.attach = false; + create.setTable(table_name); + create.setDatabase(getDatabaseName()); + + StoragePtr storage; + if (create.uuid != UUIDHelpers::Nil) + { + storage = DatabaseCatalog::instance().tryGetByUUID(create.uuid).second; + if (storage) + storage->adjustCreateQueryForBackup(create_table_query); + } + res.emplace_back(create_table_query, storage); + } + + return res; +} + + +void DatabaseReplicated::createTableRestoredFromBackup( + const ASTPtr & create_table_query, + ContextMutablePtr local_context, + std::shared_ptr restore_coordination, + UInt64 timeout_ms) { /// Because of the replication multiple nodes can try to restore the same tables again and failed with "Table already exists" /// because of some table could be restored already on other node and then replicated to this node. @@ -931,29 +975,25 @@ void DatabaseReplicated::createTableRestoredFromBackup(const ASTPtr & create_tab /// IRestoreCoordination::acquireCreatingTableInReplicatedDatabase() and then for other nodes this function returns false which means /// this table is already being created by some other node. String table_name = create_table_query->as().getTable(); - if (restorer.getRestoreCoordination()->acquireCreatingTableInReplicatedDatabase(getZooKeeperPath(), table_name)) + if (restore_coordination->acquireCreatingTableInReplicatedDatabase(getZooKeeperPath(), table_name)) { - restorer.executeCreateQuery(create_table_query); + DatabaseAtomic::createTableRestoredFromBackup(create_table_query, local_context, restore_coordination, timeout_ms); } /// Wait until the table is actually created no matter if it's created by the current or another node and replicated to the /// current node afterwards. We have to wait because `RestorerFromBackup` is going to restore data of the table then. /// TODO: The following code doesn't look very reliable, probably we need to rewrite it somehow. - auto timeout = restorer.getTimeout(); - bool use_timeout = (timeout.count() >= 0); + auto timeout = std::chrono::milliseconds{timeout_ms}; auto start_time = std::chrono::steady_clock::now(); - while (!isTableExist(table_name, restorer.getContext())) + while (!isTableExist(table_name, local_context)) { waitForReplicaToProcessAllEntries(50); - if (use_timeout) - { - auto elapsed = std::chrono::steady_clock::now() - start_time; - if (elapsed > timeout) - throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, - "Couldn't restore table {}.{} on other node or sync it (elapsed {})", - backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(table_name), to_string(elapsed)); - } + auto elapsed = std::chrono::steady_clock::now() - start_time; + if (elapsed > timeout) + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, + "Couldn't restore table {}.{} on other node or sync it (elapsed {})", + backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(table_name), to_string(elapsed)); } } diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 3aa2aa378b7..07014702067 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -72,7 +72,8 @@ public: void shutdown() override; - void createTableRestoredFromBackup(const ASTPtr & create_table_query, const RestorerFromBackup & restorer) override; + std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override; + void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr restore_coordination, UInt64 timeout_ms) override; friend struct DatabaseReplicatedTask; friend class DatabaseReplicatedDDLWorker; diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 5dd17789e60..93a9523d115 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -25,6 +25,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; extern const int CANNOT_GET_CREATE_TABLE_QUERY; + extern const int INCONSISTENT_METADATA_FOR_BACKUP; } void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata) @@ -322,22 +323,34 @@ StoragePtr DatabaseWithOwnTablesBase::getTableUnlocked(const String & table_name backQuote(database_name), backQuote(table_name)); } -DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIteratorForBackup(const BackupEntriesCollector & backup_entries_collector) const +std::vector> DatabaseWithOwnTablesBase::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const { - /// Backup all the tables in this database. - /// Here we skip inner tables of materialized views. - auto skip_internal_tables = [](const String & table_name) { return !table_name.starts_with(".inner_id."); }; - return getTablesIterator(backup_entries_collector.getContext(), skip_internal_tables); + std::vector> res; + + for (auto it = getTablesIterator(local_context, filter); it->isValid(); it->next()) + { + auto create_table_query = tryGetCreateTableQuery(it->name(), local_context); + if (!create_table_query) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't get a create query for table {}.{}", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(it->name())); + + const auto & create = create_table_query->as(); + if (create.getTable() != it->name()) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected name {} for table {}.{}", backQuoteIfNeed(create.getTable()), backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(it->name())); + + auto storage = it->table(); + storage->adjustCreateQueryForBackup(create_table_query); + res.emplace_back(create_table_query, storage); + } + + return res; } -void DatabaseWithOwnTablesBase::checkCreateTableQueryForBackup(const ASTPtr &, const BackupEntriesCollector &) const -{ -} - -void DatabaseWithOwnTablesBase::createTableRestoredFromBackup(const ASTPtr & create_table_query, const RestorerFromBackup & restorer) +void DatabaseWithOwnTablesBase::createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr, UInt64) { /// Creates a table by executing a "CREATE TABLE" query. - restorer.executeCreateQuery(create_table_query); + InterpreterCreateQuery interpreter{create_table_query, local_context}; + interpreter.setInternal(true); + interpreter.execute(); } } diff --git a/src/Databases/DatabasesCommon.h b/src/Databases/DatabasesCommon.h index c960d295529..c5842d7dac3 100644 --- a/src/Databases/DatabasesCommon.h +++ b/src/Databases/DatabasesCommon.h @@ -36,9 +36,8 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; - DatabaseTablesIteratorPtr getTablesIteratorForBackup(const BackupEntriesCollector & backup_entries_collector) const override; - void checkCreateTableQueryForBackup(const ASTPtr & create_table_query, const BackupEntriesCollector & backup_entries_collector) const override; - void createTableRestoredFromBackup(const ASTPtr & create_table_query, const RestorerFromBackup & restorer) override; + std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override; + void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr restore_coordination, UInt64 timeout_ms) override; void shutdown() override; diff --git a/src/Databases/IDatabase.cpp b/src/Databases/IDatabase.cpp index 3adba0d85c8..9e33548b0dd 100644 --- a/src/Databases/IDatabase.cpp +++ b/src/Databases/IDatabase.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -22,33 +21,15 @@ StoragePtr IDatabase::getTable(const String & name, ContextPtr context) const throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name)); } -ASTPtr IDatabase::getCreateDatabaseQueryForBackup() const +std::vector> IDatabase::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const { - auto query = getCreateDatabaseQuery(); - - /// We don't want to see any UUIDs in backup (after RESTORE the table will have another UUID anyway). - auto & create = query->as(); - create.uuid = UUIDHelpers::Nil; - - return query; -} - -DatabaseTablesIteratorPtr IDatabase::getTablesIteratorForBackup(const BackupEntriesCollector &) const -{ - /// IDatabase doesn't own any tables. - return std::make_unique(Tables{}, getDatabaseName()); -} - -void IDatabase::checkCreateTableQueryForBackup(const ASTPtr & create_table_query, const BackupEntriesCollector &) const -{ - /// Cannot restore any table because IDatabase doesn't own any tables. + /// Cannot backup any table because IDatabase doesn't own any tables. throw Exception(ErrorCodes::CANNOT_BACKUP_TABLE, - "Database engine {} does not support backups, cannot backup table {}.{}", - getEngineName(), backQuoteIfNeed(getDatabaseName()), - backQuoteIfNeed(create_table_query->as().getTable())); + "Database engine {} does not support backups, cannot backup tables in database {}", + getEngineName(), backQuoteIfNeed(getDatabaseName())); } -void IDatabase::createTableRestoredFromBackup(const ASTPtr & create_table_query, const RestorerFromBackup &) +void IDatabase::createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr, std::shared_ptr, UInt64) { /// Cannot restore any table because IDatabase doesn't own any tables. throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 2223d657f7f..72155bc818c 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -30,8 +30,7 @@ class SettingsChanges; using DictionariesWithID = std::vector>; struct ParsedTablesMetadata; struct QualifiedTableName; -class BackupEntriesCollector; -class RestorerFromBackup; +class IRestoreCoordination; namespace ErrorCodes { @@ -333,17 +332,11 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not run a replication thread!", getEngineName()); } - /// Returns a slightly changed version of the CREATE DATABASE query which must be written to a backup. - virtual ASTPtr getCreateDatabaseQueryForBackup() const; - - /// Returns an iterator that passes through all the tables when an user wants to backup the whole database. - virtual DatabaseTablesIteratorPtr getTablesIteratorForBackup(const BackupEntriesCollector & restorer) const; - - /// Checks a CREATE TABLE query before it will be written to a backup. Called by IStorage::getCreateQueryForBackup(). - virtual void checkCreateTableQueryForBackup(const ASTPtr & create_table_query, const BackupEntriesCollector & backup_entries_collector) const; + /// Returns CREATE TABLE queries and corresponding tables prepared for writing to a backup. + virtual std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & context) const; /// Creates a table restored from backup. - virtual void createTableRestoredFromBackup(const ASTPtr & create_table_query, const RestorerFromBackup & restorer); + virtual void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr context, std::shared_ptr restore_coordination, UInt64 timeout_ms); virtual ~IDatabase() = default; diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index eb1a4caf2fc..42884278e7d 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -191,10 +191,13 @@ void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) const auto settings_config_prefix = config_prefix + ".postgresql"; auto has_config_key = [](const String & key) { return dictionary_allowed_keys.contains(key) || key.starts_with("replica"); }; auto configuration = getExternalDataSourceConfigurationByPriority(config, settings_config_prefix, context, has_config_key); + const auto & settings = context->getSettingsRef(); auto pool = std::make_shared( - configuration.replicas_configurations, - context->getSettingsRef().postgresql_connection_pool_size, - context->getSettingsRef().postgresql_connection_pool_wait_timeout); + configuration.replicas_configurations, + settings.postgresql_connection_pool_size, + settings.postgresql_connection_pool_wait_timeout, + POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, + settings.postgresql_connection_pool_auto_close_connection); PostgreSQLDictionarySource::Configuration dictionary_configuration { diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp index 8390ca589e5..44248cae841 100644 --- a/src/Disks/DiskDecorator.cpp +++ b/src/Disks/DiskDecorator.cpp @@ -8,6 +8,11 @@ DiskDecorator::DiskDecorator(const DiskPtr & delegate_) : delegate(delegate_) { } +DiskTransactionPtr DiskDecorator::createTransaction() +{ + return delegate->createTransaction(); +} + const String & DiskDecorator::getName() const { return delegate->getName(); diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index e17a5aff3c7..dfb1f654dfd 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -12,6 +12,8 @@ class DiskDecorator : public IDisk { public: explicit DiskDecorator(const DiskPtr & delegate_); + + DiskTransactionPtr createTransaction() override; const String & getName() const override; ReservationPtr reserve(UInt64 bytes) override; ~DiskDecorator() override = default; diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index 4b6fca7ab2a..1a714395f82 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -238,6 +239,13 @@ public: SyncGuardPtr getDirectorySyncGuard(const String & path) const override; + DiskTransactionPtr createTransaction() override + { + /// Need to overwrite explicetly because this disk change + /// a lot of "delegate" methods. + return std::make_shared(*this); + } + private: String wrappedPath(const String & path) const { diff --git a/src/Disks/IDiskTransaction.h b/src/Disks/IDiskTransaction.h index e7b1cf3f675..4b00a9bcefc 100644 --- a/src/Disks/IDiskTransaction.h +++ b/src/Disks/IDiskTransaction.h @@ -108,7 +108,6 @@ public: /// Create hardlink from `src_path` to `dst_path`. virtual void createHardLink(const std::string & src_path, const std::string & dst_path) = 0; - }; using DiskTransactionPtr = std::shared_ptr; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 540672d9b0a..a354ad27049 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include namespace DB @@ -78,8 +79,12 @@ private: } - DiskTransactionPtr DiskObjectStorage::createTransaction() +{ + return std::make_shared(*this); +} + +DiskTransactionPtr DiskObjectStorage::createObjectStorageTransaction() { return std::make_shared( *object_storage, @@ -176,7 +181,7 @@ bool DiskObjectStorage::isFile(const String & path) const void DiskObjectStorage::createFile(const String & path) { - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); transaction->createFile(path); transaction->commit(); } @@ -201,7 +206,7 @@ void DiskObjectStorage::moveFile(const String & from_path, const String & to_pat metadata_helper->createFileOperationObject("rename", revision, object_metadata); } - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); transaction->moveFile(from_path, to_path); transaction->commit(); } @@ -215,7 +220,7 @@ void DiskObjectStorage::replaceFile(const String & from_path, const String & to_ { if (exists(to_path)) { - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); transaction->replaceFile(from_path, to_path); transaction->commit(); } @@ -225,7 +230,7 @@ void DiskObjectStorage::replaceFile(const String & from_path, const String & to_ void DiskObjectStorage::removeSharedFile(const String & path, bool delete_metadata_only) { - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); transaction->removeSharedFile(path, delete_metadata_only); transaction->commit(); } @@ -276,7 +281,7 @@ void DiskObjectStorage::createHardLink(const String & src_path, const String & d metadata_helper->createFileOperationObject("hardlink", revision, object_metadata); } - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); transaction->createHardLink(src_path, dst_path); transaction->commit(); } @@ -291,7 +296,7 @@ void DiskObjectStorage::setReadOnly(const String & path) { /// We should store read only flag inside metadata file (instead of using FS flag), /// because we modify metadata file when create hard-links from it. - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); transaction->setReadOnly(path); transaction->commit(); } @@ -305,7 +310,7 @@ bool DiskObjectStorage::isDirectory(const String & path) const void DiskObjectStorage::createDirectory(const String & path) { - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); transaction->createDirectory(path); transaction->commit(); } @@ -313,7 +318,7 @@ void DiskObjectStorage::createDirectory(const String & path) void DiskObjectStorage::createDirectories(const String & path) { - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); transaction->createDirectories(path); transaction->commit(); } @@ -321,7 +326,7 @@ void DiskObjectStorage::createDirectories(const String & path) void DiskObjectStorage::clearDirectory(const String & path) { - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); transaction->clearDirectory(path); transaction->commit(); } @@ -329,7 +334,7 @@ void DiskObjectStorage::clearDirectory(const String & path) void DiskObjectStorage::removeDirectory(const String & path) { - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); transaction->removeDirectory(path); transaction->commit(); } @@ -350,7 +355,7 @@ void DiskObjectStorage::listFiles(const String & path, std::vector & fil void DiskObjectStorage::setLastModified(const String & path, const Poco::Timestamp & timestamp) { - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); transaction->setLastModified(path, timestamp); transaction->commit(); } @@ -394,14 +399,14 @@ ReservationPtr DiskObjectStorage::reserve(UInt64 bytes) void DiskObjectStorage::removeSharedFileIfExists(const String & path, bool delete_metadata_only) { - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); transaction->removeSharedFileIfExists(path, delete_metadata_only); transaction->commit(); } void DiskObjectStorage::removeSharedRecursive(const String & path, bool keep_all_batch_data, const NameSet & file_names_remove_metadata_only) { - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); transaction->removeSharedRecursive(path, keep_all_batch_data, file_names_remove_metadata_only); transaction->commit(); } @@ -451,7 +456,7 @@ std::unique_ptr DiskObjectStorage::writeFile( WriteMode mode, const WriteSettings & settings) { - auto transaction = createTransaction(); + auto transaction = createObjectStorageTransaction(); auto result = transaction->writeFile(path, buf_size, mode, settings); return result; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index ef29dc8f071..56a1f7b7a3f 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -37,6 +37,7 @@ public: bool send_metadata_, uint64_t thread_pool_size); + /// Create fake transaction DiskTransactionPtr createTransaction() override; DiskType getType() const override { return disk_type; } @@ -163,6 +164,11 @@ public: UInt64 getRevision() const override; private: + + /// Create actual disk object storage transaction for operations + /// execution. + DiskTransactionPtr createObjectStorageTransaction(); + const String name; const String remote_fs_root_path; Poco::Logger * log; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index 1d16012437a..224174ee5a9 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -277,6 +278,8 @@ struct WriteFileObjectStorageOperation final : public IDiskObjectStorageOperatio { std::string path; std::string blob_path; + size_t size; + std::function on_execute; WriteFileObjectStorageOperation( IObjectStorage & object_storage_, @@ -288,9 +291,15 @@ struct WriteFileObjectStorageOperation final : public IDiskObjectStorageOperatio , blob_path(blob_path_) {} - void execute(MetadataTransactionPtr) override + void setOnExecute(std::function && on_execute_) { + on_execute = on_execute_; + } + void execute(MetadataTransactionPtr tx) override + { + if (on_execute) + on_execute(tx); } void undo() override @@ -368,6 +377,7 @@ void DiskObjectStorageTransaction::createDirectory(const std::string & path) void DiskObjectStorageTransaction::createDirectories(const std::string & path) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "CREATE DIRECTORIES TRANSACTION FOR PATH {}", path); operations_to_execute.emplace_back( std::make_unique(object_storage, metadata_storage, [path](MetadataTransactionPtr tx) { @@ -499,19 +509,47 @@ std::unique_ptr DiskObjectStorageTransaction::writeFile auto blob_path = fs::path(remote_fs_root_path) / blob_name; + auto write_operation = std::make_unique(object_storage, metadata_storage, path, blob_path); + std::function create_metadata_callback; - auto create_metadata_callback = [tx = shared_from_this(), mode, path, blob_name, autocommit] (size_t count) + if (autocommit) { - if (mode == WriteMode::Rewrite) - tx->metadata_transaction->createMetadataFile(path, blob_name, count); - else - tx->metadata_transaction->addBlobToMetadata(path, blob_name, count); + create_metadata_callback = [tx = shared_from_this(), mode, path, blob_name] (size_t count) + { + if (mode == WriteMode::Rewrite) + tx->metadata_transaction->createMetadataFile(path, blob_name, count); + else + tx->metadata_transaction->addBlobToMetadata(path, blob_name, count); - if (autocommit) tx->metadata_transaction->commit(); - }; + }; + } + else + { + create_metadata_callback = [write_op = write_operation.get(), mode, path, blob_name] (size_t count) + { + /// This callback called in WriteBuffer finalize method -- only there we actually know + /// how many bytes were written. We don't control when this finalize method will be called + /// so here we just modify operation itself, but don't execute anything (and don't modify metadata transaction). + /// Otherwise it's possible to get reorder of operations, like: + /// tx->createDirectory(xxx) -- will add metadata operation in execute + /// buf1 = tx->writeFile(xxx/yyy.bin) + /// buf2 = tx->writeFile(xxx/zzz.bin) + /// ... + /// buf1->finalize() // shouldn't do anything with metadata operations, just memoize what to do + /// tx->commit() + write_op->setOnExecute([mode, path, blob_name, count](MetadataTransactionPtr tx) + { + if (mode == WriteMode::Rewrite) + tx->createMetadataFile(path, blob_name, count); + else + tx->addBlobToMetadata(path, blob_name, count); + }); + }; - operations_to_execute.emplace_back(std::make_unique(object_storage, metadata_storage, path, blob_path)); + } + + operations_to_execute.emplace_back(std::move(write_operation)); /// We always use mode Rewrite because we simulate append using metadata and different files return object_storage.writeObject( @@ -569,7 +607,6 @@ void DiskObjectStorageTransaction::commit() try { operations_to_execute[i]->execute(metadata_transaction); - } catch (Exception & ex) { diff --git a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp index 106ecee4702..21641aca392 100644 --- a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp +++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace DB @@ -37,11 +38,12 @@ void registerDiskHDFS(DiskFactory & factory) /// FIXME Cache currently unsupported :( ObjectStoragePtr hdfs_storage = std::make_unique(nullptr, uri, std::move(settings), config); - auto metadata_disk = prepareForLocalMetadata(name, config, config_prefix, context_).second; + auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context_); + auto metadata_storage = std::make_shared(metadata_disk, uri); uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); - return std::make_shared( + DiskPtr disk_result = std::make_shared( name, uri, "DiskHDFS", @@ -50,6 +52,22 @@ void registerDiskHDFS(DiskFactory & factory) DiskType::HDFS, /* send_metadata = */ false, copy_thread_pool_size); + +#ifdef NDEBUG + bool use_cache = true; +#else + /// Current S3 cache implementation lead to allocations in destructor of + /// read buffer. + bool use_cache = false; +#endif + + if (config.getBool(config_prefix + ".cache_enabled", use_cache)) + { + String cache_path = config.getString(config_prefix + ".cache_path", context_->getPath() + "disks/" + name + "/cache/"); + disk_result = wrapWithCache(disk_result, "hdfs-cache", cache_path, metadata_path); + } + + return std::make_shared(disk_result); }; factory.registerDiskType("hdfs", creator); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index dc6344137d2..756b33d3eb2 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -158,6 +158,11 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference; format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name; format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names; + format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size; + format_settings.sql_insert.include_column_names = settings.output_format_sql_insert_include_column_names; + format_settings.sql_insert.table_name = settings.output_format_sql_insert_table_name; + format_settings.sql_insert.use_replace = settings.output_format_sql_insert_use_replace; + format_settings.sql_insert.quote_names = settings.output_format_sql_insert_quote_names; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 0032aea57e4..70bf8979383 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -274,6 +275,15 @@ struct FormatSettings String table_name; bool map_column_names = true; } mysql_dump; + + struct + { + UInt64 max_batch_size = DEFAULT_BLOCK_SIZE; + String table_name = "table"; + bool include_column_names = true; + bool use_replace = false; + bool quote_names = true; + } sql_insert; }; } diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 8493c84173d..0953572fab9 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -82,6 +82,7 @@ void registerOutputFormatMySQLWire(FormatFactory & factory); void registerOutputFormatMarkdown(FormatFactory & factory); void registerOutputFormatPostgreSQLWire(FormatFactory & factory); void registerOutputFormatPrometheus(FormatFactory & factory); +void registerOutputFormatSQLInsert(FormatFactory & factory); /// Input only formats. @@ -205,6 +206,7 @@ void registerFormats() registerOutputFormatPostgreSQLWire(factory); registerOutputFormatCapnProto(factory); registerOutputFormatPrometheus(factory); + registerOutputFormatSQLInsert(factory); registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); diff --git a/src/Functions/PositionImpl.h b/src/Functions/PositionImpl.h index 5380fcc36d9..76f10373a58 100644 --- a/src/Functions/PositionImpl.h +++ b/src/Functions/PositionImpl.h @@ -26,7 +26,7 @@ struct PositionCaseSensitiveASCII using MultiSearcherInBigHaystack = MultiVolnitsky; /// For searching single substring, that is different each time. This object is created for each row of data. It must have cheap initialization. - using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher; + using SearcherInSmallHaystack = StdLibASCIIStringSearcher; static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint) { @@ -62,7 +62,7 @@ struct PositionCaseInsensitiveASCII /// `Volnitsky` is not used here, because one person has measured that this is better. It will be good if you question it. using SearcherInBigHaystack = ASCIICaseInsensitiveStringSearcher; using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitive; - using SearcherInSmallHaystack = LibCASCIICaseInsensitiveStringSearcher; + using SearcherInSmallHaystack = StdLibASCIIStringSearcher; static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t /*haystack_size_hint*/) { @@ -94,7 +94,7 @@ struct PositionCaseSensitiveUTF8 { using SearcherInBigHaystack = VolnitskyUTF8; using MultiSearcherInBigHaystack = MultiVolnitskyUTF8; - using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher; + using SearcherInSmallHaystack = StdLibASCIIStringSearcher; static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint) { diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index 3f7900b6c62..c3652e10644 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -38,6 +38,12 @@ struct L1Distance state.sum += fabs(x - y); } + template + static void combine(State & state, const State & other_state, const ConstParams &) + { + state.sum += other_state.sum; + } + template static ResultType finalize(const State & state, const ConstParams &) { @@ -63,6 +69,12 @@ struct L2Distance state.sum += (x - y) * (x - y); } + template + static void combine(State & state, const State & other_state, const ConstParams &) + { + state.sum += other_state.sum; + } + template static ResultType finalize(const State & state, const ConstParams &) { @@ -70,6 +82,17 @@ struct L2Distance } }; +struct L2SquaredDistance : L2Distance +{ + static inline String name = "L2Squared"; + + template + static ResultType finalize(const State & state, const ConstParams &) + { + return state.sum; + } +}; + struct LpDistance { static inline String name = "Lp"; @@ -92,6 +115,12 @@ struct LpDistance state.sum += std::pow(fabs(x - y), params.power); } + template + static void combine(State & state, const State & other_state, const ConstParams &) + { + state.sum += other_state.sum; + } + template static ResultType finalize(const State & state, const ConstParams & params) { @@ -117,6 +146,12 @@ struct LinfDistance state.dist = fmax(state.dist, fabs(x - y)); } + template + static void combine(State & state, const State & other_state, const ConstParams &) + { + state.dist = fmax(state.dist, other_state.dist); + } + template static ResultType finalize(const State & state, const ConstParams &) { @@ -146,6 +181,14 @@ struct CosineDistance state.y_squared += y * y; } + template + static void combine(State & state, const State & other_state, const ConstParams &) + { + state.dot_prod += other_state.dot_prod; + state.x_squared += other_state.x_squared; + state.y_squared += other_state.y_squared; + } + template static ResultType finalize(const State & state, const ConstParams &) { @@ -185,25 +228,29 @@ public: case TypeIndex::Int8: case TypeIndex::Int16: case TypeIndex::Int32: - case TypeIndex::Float32: case TypeIndex::UInt64: case TypeIndex::Int64: case TypeIndex::Float64: return std::make_shared(); + case TypeIndex::Float32: + return std::make_shared(); default: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments of function {} has nested type {}. " "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", - getName(), common_type->getName()); + getName(), + common_type->getName()); } } - ColumnPtr - executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { switch (result_type->getTypeId()) { + case TypeIndex::Float32: + return executeWithResultType(arguments, input_rows_count); + break; case TypeIndex::Float64: return executeWithResultType(arguments, input_rows_count); break; @@ -248,7 +295,8 @@ private: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments of function {} has nested type {}. " "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", - getName(), type_x->getName()); + getName(), + type_x->getName()); } } @@ -273,7 +321,8 @@ private: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments of function {} has nested type {}. " "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", - getName(), type_y->getName()); + getName(), + type_y->getName()); } } @@ -310,7 +359,9 @@ private: throw Exception( ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arguments of function {} have different array sizes: {} and {}", - getName(), offsets_x[row] - prev_offset, offsets_y[row] - prev_offset); + getName(), + offsets_x[row] - prev_offset, + offsets_y[row] - prev_offset); } } @@ -324,10 +375,23 @@ private: size_t row = 0; for (auto off : offsets_x) { - typename Kernel::template State state; + /// Process chunks in vectorized manner + static constexpr size_t VEC_SIZE = 4; + typename Kernel::template State states[VEC_SIZE]; + for (; prev + VEC_SIZE < off; prev += VEC_SIZE) + { + for (size_t s = 0; s < VEC_SIZE; ++s) + Kernel::template accumulate(states[s], data_x[prev+s], data_y[prev+s], kernel_params); + } + + typename Kernel::template State state; + for (const auto & other_state : states) + Kernel::template combine(state, other_state, kernel_params); + + /// Process the tail for (; prev < off; ++prev) { - Kernel::template accumulate(state, data_x[prev], data_y[prev], kernel_params); + Kernel::template accumulate(state, data_x[prev], data_y[prev], kernel_params); } result_data[row] = Kernel::finalize(state, kernel_params); row++; @@ -360,7 +424,9 @@ private: throw Exception( ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arguments of function {} have different array sizes: {} and {}", - getName(), offsets_x[0], offsets_y[row] - prev_offset); + getName(), + offsets_x[0], + offsets_y[row] - prev_offset); } prev_offset = offsets_y[row]; } @@ -375,10 +441,24 @@ private: size_t row = 0; for (auto off : offsets_y) { - typename Kernel::template State state; - for (size_t i = 0; prev < off; ++i, ++prev) + /// Process chunks in vectorized manner + static constexpr size_t VEC_SIZE = 4; + typename Kernel::template State states[VEC_SIZE]; + size_t i = 0; + for (; prev + VEC_SIZE < off; i += VEC_SIZE, prev += VEC_SIZE) { - Kernel::template accumulate(state, data_x[i], data_y[prev], kernel_params); + for (size_t s = 0; s < VEC_SIZE; ++s) + Kernel::template accumulate(states[s], data_x[i+s], data_y[prev+s], kernel_params); + } + + typename Kernel::template State state; + for (const auto & other_state : states) + Kernel::template combine(state, other_state, kernel_params); + + /// Process the tail + for (; prev < off; ++i, ++prev) + { + Kernel::template accumulate(state, data_x[i], data_y[prev], kernel_params); } result_data[row] = Kernel::finalize(state, kernel_params); row++; @@ -430,8 +510,8 @@ LpDistance::ConstParams FunctionArrayDistance::initConstParams(const /// These functions are used by TupleOrArrayFunction FunctionPtr createFunctionArrayL1Distance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } FunctionPtr createFunctionArrayL2Distance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } +FunctionPtr createFunctionArrayL2SquaredDistance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } FunctionPtr createFunctionArrayLpDistance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } FunctionPtr createFunctionArrayLinfDistance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } FunctionPtr createFunctionArrayCosineDistance(ContextPtr context_) { return FunctionArrayDistance::create(context_); } - } diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index 2142abc4c90..e1e7935fcb1 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -31,6 +31,12 @@ struct L1Norm return result + fabs(value); } + template + inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) + { + return result + other_result; + } + template inline static ResultType finalize(ResultType result, const ConstParams &) { @@ -50,6 +56,12 @@ struct L2Norm return result + value * value; } + template + inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) + { + return result + other_result; + } + template inline static ResultType finalize(ResultType result, const ConstParams &) { @@ -57,6 +69,17 @@ struct L2Norm } }; +struct L2SquaredNorm : L2Norm +{ + static inline String name = "L2Squared"; + + template + inline static ResultType finalize(ResultType result, const ConstParams &) + { + return result; + } +}; + struct LpNorm { @@ -74,6 +97,12 @@ struct LpNorm return result + std::pow(fabs(value), params.power); } + template + inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) + { + return result + other_result; + } + template inline static ResultType finalize(ResultType result, const ConstParams & params) { @@ -93,6 +122,12 @@ struct LinfNorm return fmax(result, fabs(value)); } + template + inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &) + { + return fmax(result, other_result); + } + template inline static ResultType finalize(ResultType result, const ConstParams &) { @@ -127,11 +162,12 @@ public: case TypeIndex::Int8: case TypeIndex::Int16: case TypeIndex::Int32: - case TypeIndex::Float32: case TypeIndex::UInt64: case TypeIndex::Int64: case TypeIndex::Float64: return std::make_shared(); + case TypeIndex::Float32: + return std::make_shared(); default: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, @@ -150,6 +186,9 @@ public: switch (result_type->getTypeId()) { + case TypeIndex::Float32: + return executeWithResultType(*arr, type, input_rows_count, arguments); + break; case TypeIndex::Float64: return executeWithResultType(*arr, type, input_rows_count, arguments); break; @@ -210,10 +249,23 @@ private: size_t row = 0; for (auto off : offsets) { - Float64 result = 0; + /// Process chunks in vectorized manner + static constexpr size_t VEC_SIZE = 4; + ResultType results[VEC_SIZE] = {0}; + for (; prev + VEC_SIZE < off; prev += VEC_SIZE) + { + for (size_t s = 0; s < VEC_SIZE; ++s) + results[s] = Kernel::template accumulate(results[s], data[prev+s], kernel_params); + } + + ResultType result = 0; + for (const auto & other_state : results) + result = Kernel::template combine(result, other_state, kernel_params); + + /// Process the tail for (; prev < off; ++prev) { - result = Kernel::template accumulate(result, data[prev], kernel_params); + result = Kernel::template accumulate(result, data[prev], kernel_params); } result_data[row] = Kernel::finalize(result, kernel_params); row++; @@ -265,6 +317,7 @@ LpNorm::ConstParams FunctionArrayNorm::initConstParams(const ColumnsWith /// These functions are used by TupleOrArrayFunction FunctionPtr createFunctionArrayL1Norm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } FunctionPtr createFunctionArrayL2Norm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } +FunctionPtr createFunctionArrayL2SquaredNorm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } FunctionPtr createFunctionArrayLpNorm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } FunctionPtr createFunctionArrayLinfNorm(ContextPtr context_) { return FunctionArrayNorm::create(context_); } diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp index 411b30040cc..7974a8bbaf4 100644 --- a/src/Functions/vectorFunctions.cpp +++ b/src/Functions/vectorFunctions.cpp @@ -25,6 +25,7 @@ struct DivideName { static constexpr auto name = "divide"; }; struct L1Label { static constexpr auto name = "1"; }; struct L2Label { static constexpr auto name = "2"; }; +struct L2SquaredLabel { static constexpr auto name = "2Squared"; }; struct LinfLabel { static constexpr auto name = "inf"; }; struct LpLabel { static constexpr auto name = "p"; }; @@ -521,10 +522,10 @@ public: using FunctionL1Norm = FunctionLNorm; template <> -class FunctionLNorm : public ITupleFunction +class FunctionLNorm : public ITupleFunction { public: - static constexpr auto name = "L2Norm"; + static constexpr auto name = "L2SquaredNorm"; explicit FunctionLNorm(ContextPtr context_) : ITupleFunction(context_) {} static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } @@ -579,8 +580,7 @@ public: } } - auto sqrt = FunctionFactory::instance().get("sqrt", context); - return sqrt->build({ColumnWithTypeAndName{res_type, {}}})->getResultType(); + return res_type; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override @@ -618,9 +618,56 @@ public: } } + return res.column; + } +}; +using FunctionL2SquaredNorm = FunctionLNorm; + +template <> +class FunctionLNorm : public FunctionL2SquaredNorm +{ +private: + using Base = FunctionL2SquaredNorm; +public: + static constexpr auto name = "L2Norm"; + + explicit FunctionLNorm(ContextPtr context_) : Base(context_) {} + static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + + String getName() const override { return name; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + const auto * cur_tuple = checkAndGetDataType(arguments[0].type.get()); + + if (!cur_tuple) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 0 of function {} should be tuple, got {}", + getName(), arguments[0].type->getName()); + + const auto & cur_types = cur_tuple->getElements(); + size_t tuple_size = cur_types.size(); + if (tuple_size == 0) + return std::make_shared(); + auto sqrt = FunctionFactory::instance().get("sqrt", context); - auto sqrt_elem = sqrt->build({res}); - return sqrt_elem->execute({res}, sqrt_elem->getResultType(), input_rows_count); + return sqrt->build({ColumnWithTypeAndName{Base::getReturnTypeImpl(arguments), {}}})->getResultType(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto cur_elements = getTupleElements(*arguments[0].column); + + size_t tuple_size = cur_elements.size(); + if (tuple_size == 0) + return DataTypeUInt8().createColumnConstWithDefaultValue(input_rows_count); + + ColumnWithTypeAndName squared_res; + squared_res.type = Base::getReturnTypeImpl(arguments); + squared_res.column = Base::executeImpl(arguments, squared_res.type, input_rows_count); + + auto sqrt = FunctionFactory::instance().get("sqrt", context); + auto sqrt_elem = sqrt->build({squared_res}); + return sqrt_elem->execute({squared_res}, sqrt_elem->getResultType(), input_rows_count); } }; using FunctionL2Norm = FunctionLNorm; @@ -930,6 +977,8 @@ using FunctionL1Distance = FunctionLDistance; using FunctionL2Distance = FunctionLDistance; +using FunctionL2SquaredDistance = FunctionLDistance; + using FunctionLinfDistance = FunctionLDistance; using FunctionLpDistance = FunctionLDistance; @@ -1111,11 +1160,13 @@ private: extern FunctionPtr createFunctionArrayL1Norm(ContextPtr context_); extern FunctionPtr createFunctionArrayL2Norm(ContextPtr context_); +extern FunctionPtr createFunctionArrayL2SquaredNorm(ContextPtr context_); extern FunctionPtr createFunctionArrayLpNorm(ContextPtr context_); extern FunctionPtr createFunctionArrayLinfNorm(ContextPtr context_); extern FunctionPtr createFunctionArrayL1Distance(ContextPtr context_); extern FunctionPtr createFunctionArrayL2Distance(ContextPtr context_); +extern FunctionPtr createFunctionArrayL2SquaredDistance(ContextPtr context_); extern FunctionPtr createFunctionArrayLpDistance(ContextPtr context_); extern FunctionPtr createFunctionArrayLinfDistance(ContextPtr context_); extern FunctionPtr createFunctionArrayCosineDistance(ContextPtr context_); @@ -1136,6 +1187,14 @@ struct L2NormTraits static constexpr auto CreateArrayFunction = createFunctionArrayL2Norm; }; +struct L2SquaredNormTraits +{ + static inline String name = "L2SquaredNorm"; + + static constexpr auto CreateTupleFunction = FunctionL2SquaredNorm::create; + static constexpr auto CreateArrayFunction = createFunctionArrayL2SquaredNorm; +}; + struct LpNormTraits { static inline String name = "LpNorm"; @@ -1168,6 +1227,14 @@ struct L2DistanceTraits static constexpr auto CreateArrayFunction = createFunctionArrayL2Distance; }; +struct L2SquaredDistanceTraits +{ + static inline String name = "L2SquaredDistance"; + + static constexpr auto CreateTupleFunction = FunctionL2SquaredDistance::create; + static constexpr auto CreateArrayFunction = createFunctionArrayL2SquaredDistance; +}; + struct LpDistanceTraits { static inline String name = "LpDistance"; @@ -1194,11 +1261,13 @@ struct CosineDistanceTraits using TupleOrArrayFunctionL1Norm = TupleOrArrayFunction; using TupleOrArrayFunctionL2Norm = TupleOrArrayFunction; +using TupleOrArrayFunctionL2SquaredNorm = TupleOrArrayFunction; using TupleOrArrayFunctionLpNorm = TupleOrArrayFunction; using TupleOrArrayFunctionLinfNorm = TupleOrArrayFunction; using TupleOrArrayFunctionL1Distance = TupleOrArrayFunction; using TupleOrArrayFunctionL2Distance = TupleOrArrayFunction; +using TupleOrArrayFunctionL2SquaredDistance = TupleOrArrayFunction; using TupleOrArrayFunctionLpDistance = TupleOrArrayFunction; using TupleOrArrayFunctionLinfDistance = TupleOrArrayFunction; using TupleOrArrayFunctionCosineDistance = TupleOrArrayFunction; @@ -1221,21 +1290,25 @@ void registerVectorFunctions(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerAlias("normL1", TupleOrArrayFunctionL1Norm::name, FunctionFactory::CaseInsensitive); factory.registerAlias("normL2", TupleOrArrayFunctionL2Norm::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("normL2Squared", TupleOrArrayFunctionL2SquaredNorm::name, FunctionFactory::CaseInsensitive); factory.registerAlias("normLinf", TupleOrArrayFunctionLinfNorm::name, FunctionFactory::CaseInsensitive); factory.registerAlias("normLp", FunctionLpNorm::name, FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); factory.registerAlias("distanceL1", FunctionL1Distance::name, FunctionFactory::CaseInsensitive); factory.registerAlias("distanceL2", FunctionL2Distance::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("distanceL2Squared", FunctionL2SquaredDistance::name, FunctionFactory::CaseInsensitive); factory.registerAlias("distanceLinf", FunctionLinfDistance::name, FunctionFactory::CaseInsensitive); factory.registerAlias("distanceLp", FunctionLpDistance::name, FunctionFactory::CaseInsensitive); diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h index 9881d463ed4..45b98f8c977 100644 --- a/src/IO/ParallelReadBuffer.h +++ b/src/IO/ParallelReadBuffer.h @@ -47,6 +47,7 @@ public: off_t getPosition() override; const ReadBufferFactory & getReadBufferFactory() const { return *reader_factory; } + ReadBufferFactory & getReadBufferFactory() { return *reader_factory; } private: /// Reader in progress with a list of read segments diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 53831e02cb2..cf19b6f1980 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -227,7 +227,7 @@ size_t ReadBufferFromS3::getFileSize() if (file_size) return *file_size; - auto object_size = S3::getObjectSize(client_ptr, bucket, key, version_id, false); + auto object_size = S3::getObjectSize(client_ptr, bucket, key, version_id); file_size = object_size; return *file_size; diff --git a/src/IO/WithFileSize.cpp b/src/IO/WithFileSize.cpp index 28542db7a73..f71690fcdee 100644 --- a/src/IO/WithFileSize.cpp +++ b/src/IO/WithFileSize.cpp @@ -33,6 +33,10 @@ size_t getFileSizeFromReadBuffer(ReadBuffer & in) { return getFileSize(compressed->getWrappedReadBuffer()); } + else if (auto * parallel = dynamic_cast(&in)) + { + return getFileSize(parallel->getReadBufferFactory()); + } return getFileSize(in); } @@ -47,6 +51,10 @@ bool isBufferWithFileSize(const ReadBuffer & in) { return isBufferWithFileSize(compressed->getWrappedReadBuffer()); } + else if (const auto * parallel = dynamic_cast(&in)) + { + return dynamic_cast(¶llel->getReadBufferFactory()) != nullptr; + } return dynamic_cast(&in) != nullptr; } diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 511e5c9e031..a99ecee43bf 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -374,7 +374,15 @@ Block Aggregator::Params::getHeader( if (only_merge) { - res = header.cloneEmpty(); + NameSet needed_columns(keys.begin(), keys.end()); + for (const auto & aggregate : aggregates) + needed_columns.emplace(aggregate.column_name); + + for (const auto & column : header) + { + if (needed_columns.contains(column.name)) + res.insert(column.cloneEmpty()); + } if (final) { diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index a0579b813db..bae2aed2cd5 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -234,12 +234,13 @@ void DatabaseCatalog::shutdownImpl() view_dependencies.clear(); } -bool DatabaseCatalog::isPredefinedDatabaseName(const std::string_view & database_name) +bool DatabaseCatalog::isPredefinedDatabase(const std::string_view & database_name) { return database_name == TEMPORARY_DATABASE || database_name == SYSTEM_DATABASE || database_name == INFORMATION_SCHEMA || database_name == INFORMATION_SCHEMA_UPPERCASE; } + DatabaseAndTable DatabaseCatalog::tryGetByUUID(const UUID & uuid) const { assert(uuid != UUIDHelpers::Nil && getFirstLevelIdx(uuid) < uuid_map.size()); @@ -328,6 +329,48 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( return {database, table}; } +bool DatabaseCatalog::isPredefinedTable(const StorageID & table_id) const +{ + static const char * information_schema_views[] = {"schemata", "tables", "views", "columns"}; + static const char * information_schema_views_uppercase[] = {"SCHEMATA", "TABLES", "VIEWS", "COLUMNS"}; + + auto check_database_and_table_name = [&](const String & database_name, const String & table_name) + { + if (database_name == SYSTEM_DATABASE) + { + auto storage = getSystemDatabase()->tryGetTable(table_name, getContext()); + return storage && storage->isSystemStorage(); + } + if (database_name == INFORMATION_SCHEMA) + { + return std::find(std::begin(information_schema_views), std::end(information_schema_views), table_name) + != std::end(information_schema_views); + } + if (database_name == INFORMATION_SCHEMA_UPPERCASE) + { + return std::find(std::begin(information_schema_views_uppercase), std::end(information_schema_views_uppercase), table_name) + != std::end(information_schema_views_uppercase); + } + return false; + }; + + if (table_id.hasUUID()) + { + if (auto storage = tryGetByUUID(table_id.uuid).second) + { + if (storage->isSystemStorage()) + return true; + auto res_id = storage->getStorageID(); + String database_name = res_id.getDatabaseName(); + if (database_name != SYSTEM_DATABASE) /// If (database_name == SYSTEM_DATABASE) then we have already checked it (see isSystemStorage() above). + return check_database_and_table_name(database_name, res_id.getTableName()); + } + return false; + } + + return check_database_and_table_name(table_id.getDatabaseName(), table_id.getTableName()); +} + void DatabaseCatalog::assertDatabaseExists(const String & database_name) const { std::lock_guard lock{databases_mutex}; diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 4468cc3a5d8..133cf0c5126 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -130,8 +130,8 @@ public: static constexpr const char * INFORMATION_SCHEMA = "information_schema"; static constexpr const char * INFORMATION_SCHEMA_UPPERCASE = "INFORMATION_SCHEMA"; - /// Returns true if a passed string is one of the predefined databases' names - static bool isPredefinedDatabaseName(const std::string_view & database_name); + /// Returns true if a passed name is one of the predefined databases' names. + static bool isPredefinedDatabase(const std::string_view & database_name); static DatabaseCatalog & init(ContextMutablePtr global_context_); static DatabaseCatalog & instance(); @@ -181,6 +181,11 @@ public: ContextPtr context, std::optional * exception = nullptr) const; + /// Returns true if a passed table_id refers to one of the predefined tables' names. + /// All tables in the "system" database with System* table engine are predefined. + /// Four views (tables, views, columns, schemata) in the "information_schema" database are predefined too. + bool isPredefinedTable(const StorageID & table_id) const; + void addDependency(const StorageID & from, const StorageID & where); void removeDependency(const StorageID & from, const StorageID & where); Dependencies getDependencies(const StorageID & from) const; diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index dbfdba77619..f0658cb7c9b 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -142,6 +142,33 @@ std::optional IdentifierSemantic::extractNestedName(const ASTIdentifier return {}; } +String IdentifierSemantic::extractNestedName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & table) +{ + auto match = IdentifierSemantic::canReferColumnToTable(identifier, table); + size_t to_strip = 0; + switch (match) + { + case IdentifierSemantic::ColumnMatch::TableName: + case IdentifierSemantic::ColumnMatch::AliasedTableName: + case IdentifierSemantic::ColumnMatch::TableAlias: + to_strip = 1; + break; + case IdentifierSemantic::ColumnMatch::DBAndTable: + to_strip = 2; + break; + default: + break; + } + String res; + for (size_t i = to_strip, sz = identifier.name_parts.size(); i < sz; ++i) + { + if (!res.empty()) + res += "."; + res += identifier.name_parts[i]; + } + return res; +} + bool IdentifierSemantic::doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table) { size_t num_components = identifier.name_parts.size(); diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h index 5dc828c36ba..c082e83b75c 100644 --- a/src/Interpreters/IdentifierSemantic.h +++ b/src/Interpreters/IdentifierSemantic.h @@ -44,6 +44,8 @@ struct IdentifierSemantic /// @returns name for 'not a column' identifiers static std::optional extractNestedName(const ASTIdentifier & identifier, const String & table_name); + static String extractNestedName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & table); + static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNamesAndTypes & table_with_columns); diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp index ef19eaa1c42..5117b92efdf 100644 --- a/src/Interpreters/InterpreterCreateIndexQuery.cpp +++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp @@ -38,8 +38,7 @@ BlockIO InterpreterCreateIndexQuery::execute() query_ptr->as().setDatabase(table_id.database_name); DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name); - if (typeid_cast(database.get()) - && !current_context->getClientInfo().is_replicated_database_internal) + if (typeid_cast(database.get()) && !current_context->getClientInfo().is_replicated_database_internal) { auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name); guard->releaseTableLock(); diff --git a/src/Interpreters/InterpreterDropIndexQuery.cpp b/src/Interpreters/InterpreterDropIndexQuery.cpp index 2339e0dc68e..70f35a92688 100644 --- a/src/Interpreters/InterpreterDropIndexQuery.cpp +++ b/src/Interpreters/InterpreterDropIndexQuery.cpp @@ -1,8 +1,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -36,8 +36,7 @@ BlockIO InterpreterDropIndexQuery::execute() query_ptr->as().setDatabase(table_id.database_name); DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name); - if (typeid_cast(database.get()) - && !current_context->getClientInfo().is_replicated_database_internal) + if (typeid_cast(database.get()) && !current_context->getClientInfo().is_replicated_database_internal) { auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name); guard->releaseTableLock(); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 4ed293e8530..7b6066575ae 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -457,20 +457,26 @@ BlockIO InterpreterInsertQuery::execute() }); size_t num_select_threads = pipeline.getNumThreads(); - size_t num_insert_threads = std::max_element(out_chains.begin(), out_chains.end(), [&](const auto &a, const auto &b) - { - return a.getNumThreads() < b.getNumThreads(); - })->getNumThreads(); for (auto & chain : out_chains) resources = chain.detachResources(); pipeline.addChains(std::move(out_chains)); - pipeline.setMaxThreads(num_insert_threads); - /// Don't use more threads for insert then for select to reduce memory consumption. - if (!settings.parallel_view_processing && pipeline.getNumThreads() > num_select_threads) - pipeline.setMaxThreads(num_select_threads); + if (!settings.parallel_view_processing) + { + /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. + if (pipeline.getNumThreads() > num_select_threads) + pipeline.setMaxThreads(num_select_threads); + } + else if (pipeline.getNumThreads() < settings.max_threads) + { + /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, + /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. + /// + /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. + pipeline.setMaxThreads(settings.max_threads); + } pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr { diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index cf229442442..28438a86e47 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2572,8 +2572,13 @@ void InterpreterSelectQuery::executeDistinct(QueryPlan & query_plan, bool before SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode); - auto distinct_step - = std::make_unique(query_plan.getCurrentDataStream(), limits, limit_for_distinct, columns, pre_distinct); + auto distinct_step = std::make_unique( + query_plan.getCurrentDataStream(), + limits, + limit_for_distinct, + columns, + pre_distinct, + settings.optimize_distinct_in_order); if (pre_distinct) distinct_step->setStepDescription("Preliminary DISTINCT"); diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index e886027683f..9f87a47fced 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -329,8 +329,13 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan) /// Add distinct transform SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode); - auto distinct_step - = std::make_unique(query_plan.getCurrentDataStream(), limits, 0, result_header.getNames(), false); + auto distinct_step = std::make_unique( + query_plan.getCurrentDataStream(), + limits, + 0, + result_header.getNames(), + false, + settings.optimize_distinct_in_order); query_plan.addStep(std::move(distinct_step)); } diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 3129f9d7fe2..b58b90b6d47 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -6,6 +7,7 @@ #include #include #include +#include #include #include @@ -29,38 +31,56 @@ namespace ErrorCodes extern const int UNSUPPORTED_JOIN_KEYS; extern const int LOGICAL_ERROR; } +bool TranslateQualifiedNamesMatcher::Data::matchColumnName(const std::string_view & name, const String & column_name, DataTypePtr column_type) +{ + if (name.size() < column_name.size()) + return false; + if (!name.starts_with(column_name)) + return false; + + if (name.size() == column_name.size()) + return true; + + /// In case the type is named tuple, check the name recursively. + if (const DataTypeTuple * type_tuple = typeid_cast(column_type.get())) + { + if (type_tuple->haveExplicitNames() && name.at(column_name.size()) == '.') + { + const Strings & names = type_tuple->getElementNames(); + const DataTypes & element_types = type_tuple->getElements(); + std::string_view sub_name = name.substr(column_name.size() + 1); + for (size_t i = 0; i < names.size(); ++i) + { + if (matchColumnName(sub_name, names[i], element_types[i])) + { + return true; + } + } + } + } + + return false; +} bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const ASTIdentifier & identifier) const { const auto & table = tables[table_pos].table; - auto nested1 = IdentifierSemantic::extractNestedName(identifier, table.table); - auto nested2 = IdentifierSemantic::extractNestedName(identifier, table.alias); - - const String & short_name = identifier.shortName(); const auto & columns = tables[table_pos].columns; + + // Remove database and table name from the identifier'name + auto full_name = IdentifierSemantic::extractNestedName(identifier, table); + for (const auto & column : columns) { - const String & known_name = column.name; - if (short_name == known_name) - return false; - if (nested1 && *nested1 == known_name) - return false; - if (nested2 && *nested2 == known_name) + if (matchColumnName(full_name, column.name, column.type)) return false; } - const auto & hidden_columns = tables[table_pos].hidden_columns; for (const auto & column : hidden_columns) { - const String & known_name = column.name; - if (short_name == known_name) - return false; - if (nested1 && *nested1 == known_name) - return false; - if (nested2 && *nested2 == known_name) + if (matchColumnName(full_name, column.name, column.type)) return false; } - return !columns.empty(); } diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index 9c46d926eca..e0c2f6b6bc0 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -39,6 +39,7 @@ public: bool hasTable() const { return !tables.empty(); } bool processAsterisks() const { return hasTable() && has_columns; } bool unknownColumn(size_t table_pos, const ASTIdentifier & identifier) const; + static bool matchColumnName(const std::string_view & name, const String & column_name, DataTypePtr column_type); }; static void visit(ASTPtr & ast, Data & data); @@ -53,6 +54,7 @@ private: static void visit(ASTFunction &, const ASTPtr &, Data &); static void extractJoinUsingColumns(ASTPtr ast, Data & data); + }; /// Visits AST for names qualification. diff --git a/src/Interpreters/getClusterName.cpp b/src/Interpreters/getClusterName.cpp index fee10e32d70..d3c53b28cdf 100644 --- a/src/Interpreters/getClusterName.cpp +++ b/src/Interpreters/getClusterName.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -22,7 +23,7 @@ std::string getClusterName(const IAST & node) return ast_id->name(); if (const auto * ast_lit = node.as()) - return ast_lit->value.safeGet(); + return checkAndGetLiteralArgument(*ast_lit, "cluster_name"); /// A hack to support hyphens in cluster names. if (const auto * ast_func = node.as()) diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index af0d9064626..ab31d3f9b7a 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -1,10 +1,10 @@ #include #include +#include #include #include #include -#include #include #include #include @@ -86,15 +86,15 @@ bool ParserCreateIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect if (!s_on.ignore(pos, expected)) return false; - if (!parseDatabaseAndTableAsAST(pos, expected, query->database, query->table)) + if (!parseDatabaseAndTableAsAST(pos, expected, query->database, query->table)) return false; /// [ON cluster_name] - if (s_on.ignore(pos, expected)) - { + if (s_on.ignore(pos, expected)) + { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; - } + } if (!parser_create_idx_decl.parse(pos, index_decl, expected)) return false; diff --git a/src/Parsers/ParserDropIndexQuery.cpp b/src/Parsers/ParserDropIndexQuery.cpp index 89ed4f01838..5843d8b94bf 100644 --- a/src/Parsers/ParserDropIndexQuery.cpp +++ b/src/Parsers/ParserDropIndexQuery.cpp @@ -38,17 +38,17 @@ bool ParserDropIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected if (!s_on.ignore(pos, expected)) return false; - if (!parseDatabaseAndTableAsAST(pos, expected, query->database, query->table)) + if (!parseDatabaseAndTableAsAST(pos, expected, query->database, query->table)) return false; /// [ON cluster_name] - if (s_on.ignore(pos, expected)) - { + if (s_on.ignore(pos, expected)) + { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; query->cluster = std::move(cluster_str); - } + } if (query->index_name) query->children.push_back(query->index_name); diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt index 3d416544419..86eb8bf36a5 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt +++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt @@ -31,7 +31,8 @@ add_custom_command( DEPENDS "${CURRENT_DIR_IN_BINARY}/clickhouse.g" ) -PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS "${CURRENT_DIR_IN_BINARY}/out.proto") +set(PROTOBUF_GENERATE_CPP_APPEND_PATH TRUE) +protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS "${CURRENT_DIR_IN_BINARY}/out.proto") set(FUZZER_SRCS codegen_select_fuzzer.cpp "${CURRENT_DIR_IN_BINARY}/out.cpp" ${PROTO_SRCS} ${PROTO_HDRS}) set(CMAKE_INCLUDE_CURRENT_DIR TRUE) diff --git a/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp new file mode 100644 index 00000000000..749b4b40984 --- /dev/null +++ b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.cpp @@ -0,0 +1,102 @@ +#include +#include + + +namespace DB +{ + +SQLInsertRowOutputFormat::SQLInsertRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_, params_), column_names(header_.getNames()), format_settings(format_settings_) +{ +} + +void SQLInsertRowOutputFormat::writeRowStartDelimiter() +{ + if (rows_in_line == 0) + printLineStart(); + writeChar('(', out); +} + +void SQLInsertRowOutputFormat::printLineStart() +{ + if (format_settings.sql_insert.use_replace) + writeCString("REPLACE INTO ", out); + else + writeCString("INSERT INTO ", out); + + writeString(format_settings.sql_insert.table_name, out); + + if (format_settings.sql_insert.include_column_names) + printColumnNames(); + + writeCString(" VALUES ", out); +} + +void SQLInsertRowOutputFormat::printColumnNames() +{ + writeCString(" (", out); + for (size_t i = 0; i != column_names.size(); ++i) + { + if (format_settings.sql_insert.quote_names) + writeChar('`', out); + + writeString(column_names[i], out); + + if (format_settings.sql_insert.quote_names) + writeChar('`', out); + + if (i + 1 != column_names.size()) + writeCString(", ", out); + } + writeChar(')', out); +} + +void SQLInsertRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) +{ + serialization.serializeTextQuoted(column, row_num, out, format_settings); +} + +void SQLInsertRowOutputFormat::writeFieldDelimiter() +{ + writeCString(", ", out); +} + +void SQLInsertRowOutputFormat::writeRowEndDelimiter() +{ + writeChar(')', out); + ++rows_in_line; +} + +void SQLInsertRowOutputFormat::writeRowBetweenDelimiter() +{ + if (rows_in_line >= format_settings.sql_insert.max_batch_size) + { + writeCString(";\n", out); + rows_in_line = 0; + } + else + { + writeCString(", ", out); + } +} + +void SQLInsertRowOutputFormat::writeSuffix() +{ + writeCString(";\n", out); +} + + +void registerOutputFormatSQLInsert(FormatFactory & factory) +{ + factory.registerOutputFormat("SQLInsert", []( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams & params, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, params, settings); + }); +} + + +} diff --git a/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h new file mode 100644 index 00000000000..aaaf39a9e4d --- /dev/null +++ b/src/Processors/Formats/Impl/SQLInsertRowOutputFormat.h @@ -0,0 +1,43 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class WriteBuffer; + +class SQLInsertRowOutputFormat : public IRowOutputFormat +{ +public: + SQLInsertRowOutputFormat( + WriteBuffer & out_, + const Block & header_, + const RowOutputFormatParams & params_, + const FormatSettings & format_settings_); + + String getName() const override { return "SQLInsertRowOutputFormat"; } + + /// https://www.iana.org/assignments/media-types/text/tab-separated-values + String getContentType() const override { return "text/tab-separated-values; charset=UTF-8"; } + +protected: + void writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) override; + virtual void writeFieldDelimiter() override; + virtual void writeRowStartDelimiter() override; + virtual void writeRowEndDelimiter() override; + virtual void writeRowBetweenDelimiter() override; + virtual void writeSuffix() override; + + void printLineStart(); + void printColumnNames(); + + size_t rows_in_line = 0; + Names column_names; + const FormatSettings format_settings; +}; + +} diff --git a/src/Processors/QueryPlan/DistinctStep.cpp b/src/Processors/QueryPlan/DistinctStep.cpp index 98fbd75dd85..553732fbcc5 100644 --- a/src/Processors/QueryPlan/DistinctStep.cpp +++ b/src/Processors/QueryPlan/DistinctStep.cpp @@ -1,8 +1,11 @@ #include +#include +#include #include #include #include #include +#include namespace DB { @@ -37,13 +40,26 @@ static ITransformingStep::Traits getTraits(bool pre_distinct, bool already_disti }; } +static SortDescription getSortDescription(const SortDescription & input_sort_desc, const Names& columns) +{ + SortDescription distinct_sort_desc; + for (const auto & sort_column_desc : input_sort_desc) + { + if (std::find(begin(columns), end(columns), sort_column_desc.column_name) == columns.end()) + break; + distinct_sort_desc.emplace_back(sort_column_desc); + } + return distinct_sort_desc; +} + DistinctStep::DistinctStep( const DataStream & input_stream_, const SizeLimits & set_size_limits_, UInt64 limit_hint_, const Names & columns_, - bool pre_distinct_) + bool pre_distinct_, + bool optimize_distinct_in_order_) : ITransformingStep( input_stream_, input_stream_.header, @@ -52,6 +68,7 @@ DistinctStep::DistinctStep( , limit_hint(limit_hint_) , columns(columns_) , pre_distinct(pre_distinct_) + , optimize_distinct_in_order(optimize_distinct_in_order_) { if (!output_stream->distinct_columns.empty() /// Columns already distinct, do nothing && (!pre_distinct /// Main distinct @@ -65,19 +82,58 @@ DistinctStep::DistinctStep( void DistinctStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - if (checkColumnsAlreadyDistinct(columns, input_streams.front().distinct_columns)) + const auto & input_stream = input_streams.back(); + if (checkColumnsAlreadyDistinct(columns, input_stream.distinct_columns)) return; if (!pre_distinct) pipeline.resize(1); - pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr + if (optimize_distinct_in_order) { - if (stream_type != QueryPipelineBuilder::StreamType::Main) - return nullptr; + SortDescription distinct_sort_desc = getSortDescription(input_stream.sort_description, columns); + if (!distinct_sort_desc.empty()) + { + /// pre-distinct for sorted chunks + if (pre_distinct) + { + pipeline.addSimpleTransform( + [&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr + { + if (stream_type != QueryPipelineBuilder::StreamType::Main) + return nullptr; - return std::make_shared(header, set_size_limits, limit_hint, columns); - }); + return std::make_shared( + header, set_size_limits, limit_hint, distinct_sort_desc, columns); + }); + return; + } + /// final distinct for sorted stream (sorting inside and among chunks) + if (input_stream.sort_mode == DataStream::SortMode::Stream) + { + assert(input_stream.has_single_port); + + pipeline.addSimpleTransform( + [&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr + { + if (stream_type != QueryPipelineBuilder::StreamType::Main) + return nullptr; + + return std::make_shared(header, distinct_sort_desc, set_size_limits, limit_hint, columns); + }); + return; + } + } + } + + pipeline.addSimpleTransform( + [&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr + { + if (stream_type != QueryPipelineBuilder::StreamType::Main) + return nullptr; + + return std::make_shared(header, set_size_limits, limit_hint, columns); + }); } void DistinctStep::describeActions(FormatSettings & settings) const diff --git a/src/Processors/QueryPlan/DistinctStep.h b/src/Processors/QueryPlan/DistinctStep.h index a4424e01d72..dc734a58704 100644 --- a/src/Processors/QueryPlan/DistinctStep.h +++ b/src/Processors/QueryPlan/DistinctStep.h @@ -10,11 +10,12 @@ class DistinctStep : public ITransformingStep { public: DistinctStep( - const DataStream & input_stream_, - const SizeLimits & set_size_limits_, - UInt64 limit_hint_, - const Names & columns_, - bool pre_distinct_); /// If is enabled, execute distinct for separate streams. Otherwise, merge streams. + const DataStream & input_stream_, + const SizeLimits & set_size_limits_, + UInt64 limit_hint_, + const Names & columns_, + bool pre_distinct_, /// If is enabled, execute distinct for separate streams. Otherwise, merge streams. + bool optimize_distinct_in_order_); String getName() const override { return "Distinct"; } @@ -30,6 +31,7 @@ private: UInt64 limit_hint; Names columns; bool pre_distinct; + bool optimize_distinct_in_order; }; } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index f377709a899..4a1772759bc 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -71,6 +71,18 @@ static const PrewhereInfoPtr & getPrewhereInfo(const SelectQueryInfo & query_inf : query_info.prewhere_info; } +static int getSortDirection(const SelectQueryInfo & query_info) +{ + const InputOrderInfoPtr & order_info = query_info.input_order_info + ? query_info.input_order_info + : (query_info.projection ? query_info.projection->input_order_info : nullptr); + + if (!order_info) + return 1; + + return order_info->direction; +} + ReadFromMergeTree::ReadFromMergeTree( MergeTreeData::DataPartsVector parts_, Names real_column_names_, @@ -124,6 +136,22 @@ ReadFromMergeTree::ReadFromMergeTree( /// Add explicit description. setStepDescription(data.getStorageID().getFullNameNotQuoted()); + + { /// build sort description for output stream + SortDescription sort_description; + const Names & sorting_key_columns = storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(); + const Block & header = output_stream->header; + const int sort_direction = getSortDirection(query_info); + for (const auto & column_name : sorting_key_columns) + { + if (std::find_if(header.begin(), header.end(), [&](ColumnWithTypeAndName const & col) { return col.name == column_name; }) + == header.end()) + break; + sort_description.emplace_back(column_name, sort_direction); + } + output_stream->sort_description = std::move(sort_description); + output_stream->sort_mode = DataStream::SortMode::Chunk; + } } Pipe ReadFromMergeTree::readFromPool( diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 5a543497ed0..5d5c7e9cb2c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -111,7 +111,7 @@ public: void describeActions(JSONBuilder::JSONMap & map) const override; void describeIndexes(JSONBuilder::JSONMap & map) const override; - const StorageID getStorageID() const { return data.getStorageID(); } + StorageID getStorageID() const { return data.getStorageID(); } UInt64 getSelectedParts() const { return selected_parts; } UInt64 getSelectedRows() const { return selected_rows; } UInt64 getSelectedMarks() const { return selected_marks; } diff --git a/src/Processors/Transforms/DistinctSortedChunkTransform.cpp b/src/Processors/Transforms/DistinctSortedChunkTransform.cpp new file mode 100644 index 00000000000..064c827a8cc --- /dev/null +++ b/src/Processors/Transforms/DistinctSortedChunkTransform.cpp @@ -0,0 +1,232 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SET_SIZE_LIMIT_EXCEEDED; +} + +DistinctSortedChunkTransform::DistinctSortedChunkTransform( + const Block & header_, + const SizeLimits & output_size_limits_, + UInt64 limit_hint_, + const SortDescription & sorted_columns_descr_, + const Names & source_columns) + : ISimpleTransform(header_, header_, true) + , limit_hint(limit_hint_) + , output_size_limits(output_size_limits_) + , sorted_columns_descr(sorted_columns_descr_) +{ + /// calculate sorted columns positions + sorted_columns_pos.reserve(sorted_columns_descr.size()); + for (auto const & descr : sorted_columns_descr) + { + size_t pos = header_.getPositionByName(descr.column_name); + sorted_columns_pos.emplace_back(pos); + } + + /// calculate non-sorted columns positions + other_columns_pos.reserve(source_columns.size()); + for (const auto & source_column : source_columns) + { + size_t pos = header_.getPositionByName(source_column); + if (std::find(sorted_columns_pos.begin(), sorted_columns_pos.end(), pos) != sorted_columns_pos.end()) + continue; + + const auto & col = header_.getByPosition(pos).column; + if (col && !isColumnConst(*col)) + other_columns_pos.emplace_back(pos); + } + + /// reserve space in auxiliary column vectors for processing + sorted_columns.reserve(sorted_columns_pos.size()); + other_columns.reserve(other_columns_pos.size()); + current_key.reserve(sorted_columns.size()); +} + +void DistinctSortedChunkTransform::initChunkProcessing(const Columns & input_columns) +{ + sorted_columns.clear(); + for (size_t pos : sorted_columns_pos) + sorted_columns.emplace_back(input_columns[pos].get()); + + other_columns.clear(); + for (size_t pos : other_columns_pos) + other_columns.emplace_back(input_columns[pos].get()); + + if (!other_columns.empty() && data.type == ClearableSetVariants::Type::EMPTY) + data.init(ClearableSetVariants::chooseMethod(other_columns, other_columns_sizes)); +} + +size_t DistinctSortedChunkTransform::ordinaryDistinctOnRange(IColumn::Filter & filter, size_t range_begin, size_t range_end, bool clear_data) +{ + size_t count = 0; + switch (data.type) + { + case ClearableSetVariants::Type::EMPTY: + break; + // clang-format off +#define M(NAME) \ + case ClearableSetVariants::Type::NAME: \ + count = buildFilterForRange(*data.NAME, filter, range_begin, range_end, clear_data); \ + break; + + APPLY_FOR_SET_VARIANTS(M) +#undef M + // clang-format on + } + return count; +} + +template +size_t DistinctSortedChunkTransform::buildFilterForRange( + Method & method, IColumn::Filter & filter, size_t range_begin, size_t range_end, bool clear_data) +{ + typename Method::State state(other_columns, other_columns_sizes, nullptr); + if (clear_data) + method.data.clear(); + + size_t count = 0; + for (size_t i = range_begin; i < range_end; ++i) + { + auto emplace_result = state.emplaceKey(method.data, i, data.string_pool); + + /// emit the record if there is no such key in the current set, skip otherwise + filter[i] = emplace_result.isInserted(); + if (filter[i]) + ++count; + } + return count; +} + +void DistinctSortedChunkTransform::setCurrentKey(const size_t row_pos) +{ + current_key.clear(); + for (auto const & col : sorted_columns) + { + current_key.emplace_back(col->cloneEmpty()); + current_key.back()->insertFrom(*col, row_pos); + } +} + +bool DistinctSortedChunkTransform::isCurrentKey(const size_t row_pos) const +{ + for (size_t i = 0; i < sorted_columns.size(); ++i) + { + int res = current_key[i]->compareAt(0, row_pos, *sorted_columns[i], sorted_columns_descr[i].nulls_direction); + if (res != 0) + return false; + } + return true; +} + +size_t DistinctSortedChunkTransform::getRangeEnd(size_t begin, size_t end) const +{ + assert(begin < end); + + const size_t linear_probe_threadhold = 16; + size_t linear_probe_end = begin + linear_probe_threadhold; + if (linear_probe_end > end) + linear_probe_end = end; + + for (size_t pos = begin; pos < linear_probe_end; ++pos) + { + if (!isCurrentKey(pos)) + return pos; + } + + size_t low = linear_probe_end; + size_t high = end - 1; + while (low <= high) + { + size_t mid = low + (high - low) / 2; + if (isCurrentKey(mid)) + low = mid + 1; + else + { + high = mid - 1; + end = mid; + } + } + return end; +} + +std::pair DistinctSortedChunkTransform::continueWithPrevRange(const size_t chunk_rows, IColumn::Filter & filter) +{ + /// current_key is empty on very first transform() call + /// or first row doesn't match a key from previous transform() + if (current_key.empty() || !isCurrentKey(0)) + return {0, 0}; + + size_t output_rows = 0; + const size_t range_end = getRangeEnd(0, chunk_rows); + if (other_columns.empty()) + std::fill(filter.begin(), filter.begin() + range_end, 0); /// skip rows already included in distinct on previous transform() + else + output_rows = ordinaryDistinctOnRange(filter, 0, range_end, false); + + return {range_end, output_rows}; +} + +void DistinctSortedChunkTransform::transform(Chunk & chunk) +{ + const size_t chunk_rows = chunk.getNumRows(); + if (unlikely(0 == chunk_rows)) + return; + + Columns input_columns = chunk.detachColumns(); + /// split input columns into sorted and other("non-sorted") columns + initChunkProcessing(input_columns); + + /// build filter: + /// (1) find range with the same values in sorted columns -> [range_begin, range_end) + /// (2) for found range + /// if there is no "non-sorted" columns: filter out all rows in range except first one + /// otherwise: apply ordinary distinct + /// (3) repeat until chunk is processed + IColumn::Filter filter(chunk_rows); + auto [range_begin, output_rows] = continueWithPrevRange(chunk_rows, filter); /// try to process chuck as continuation of previous one + size_t range_end = range_begin; + while (range_end != chunk_rows) + { + // set current key to find range + setCurrentKey(range_begin); + + // find new range [range_begin, range_end) + range_end = getRangeEnd(range_begin, chunk_rows); + + // update filter for range + if (other_columns.empty()) + { + filter[range_begin] = 1; + std::fill(filter.begin() + range_begin + 1, filter.begin() + range_end, 0); + ++output_rows; + } + else + { + // ordinary distinct in range if there are "non-sorted" columns + output_rows += ordinaryDistinctOnRange(filter, range_begin, range_end, true); + } + + // set where next range start + range_begin = range_end; + } + + /// apply the built filter + for (auto & input_column : input_columns) + input_column = input_column->filter(filter, output_rows); + + chunk.setColumns(std::move(input_columns), output_rows); + + /// Update total output rows and check limits + total_output_rows += output_rows; + if ((limit_hint && total_output_rows >= limit_hint) + || !output_size_limits.check(total_output_rows, data.getTotalByteCount(), "DISTINCT", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED)) + { + stopReading(); + } +} + +} diff --git a/src/Processors/Transforms/DistinctSortedChunkTransform.h b/src/Processors/Transforms/DistinctSortedChunkTransform.h new file mode 100644 index 00000000000..2e21c36f7dc --- /dev/null +++ b/src/Processors/Transforms/DistinctSortedChunkTransform.h @@ -0,0 +1,72 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ +/// +/// DISTINCT optimization for sorted chunks +/// +/// (1) distinct columns are split into two groups - sorted i.e. belong to sorting prefix, +/// and non-sorted (other columns w/o sorting guarantees). +/// +/// (2) Rows are split into ranges. Range is a set of rows where the sorting prefix value is the same. +/// If there are no non-sorted columns, then we just skip all rows in range except one. +/// If there are non-sorted columns, then for each range, we use a hash table to find unique rows in a range. +/// +/// (3) The implementation also checks if current chunks is continuation of previous one, +/// i.e. sorting prefix value of last row in previous chunk is the same as of first row in current one, +/// so it can correctly process sorted stream as well. +/// For this, we don't clear sorting prefix value and hash table after a range is processed, +/// only right before a new range processing +/// +class DistinctSortedChunkTransform : public ISimpleTransform +{ +public: + DistinctSortedChunkTransform( + const Block & header_, + const SizeLimits & output_size_limits_, + UInt64 limit_hint_, + const SortDescription & sorted_columns_descr_, + const Names & source_columns_); + + String getName() const override { return "DistinctSortedChunkTransform"; } + +protected: + void transform(Chunk & chunk) override; + +private: + void initChunkProcessing(const Columns & input_columns); + std::pair continueWithPrevRange(size_t chunk_rows, IColumn::Filter & filter); + size_t ordinaryDistinctOnRange(IColumn::Filter & filter, size_t range_begin, size_t range_end, bool clear_data); + inline void setCurrentKey(size_t row_pos); + inline bool isCurrentKey(size_t row_pos) const; + inline size_t getRangeEnd(size_t range_begin, size_t range_end) const; + + template + size_t buildFilterForRange(Method & method, IColumn::Filter & filter, size_t range_begin, size_t range_end, bool clear_data); + + + ClearableSetVariants data; + const size_t limit_hint; + size_t total_output_rows = 0; + + /// Restrictions on the maximum size of the output data. + const SizeLimits output_size_limits; + + const SortDescription sorted_columns_descr; + ColumnNumbers sorted_columns_pos; + ColumnRawPtrs sorted_columns; // used during processing + + ColumnNumbers other_columns_pos; + Sizes other_columns_sizes; + ColumnRawPtrs other_columns; // used during processing + + MutableColumns current_key; +}; + +} diff --git a/src/Processors/Transforms/DistinctTransform.cpp b/src/Processors/Transforms/DistinctTransform.cpp index cf6a128aa40..0108b7d6547 100644 --- a/src/Processors/Transforms/DistinctTransform.cpp +++ b/src/Processors/Transforms/DistinctTransform.cpp @@ -17,17 +17,13 @@ DistinctTransform::DistinctTransform( , limit_hint(limit_hint_) , set_size_limits(set_size_limits_) { - size_t num_columns = columns_.empty() ? header_.columns() : columns_.size(); - - key_columns_pos.reserve(columns_.size()); + const size_t num_columns = columns_.empty() ? header_.columns() : columns_.size(); + key_columns_pos.reserve(num_columns); for (size_t i = 0; i < num_columns; ++i) { - auto pos = columns_.empty() ? i - : header_.getPositionByName(columns_[i]); - + const auto pos = columns_.empty() ? i : header_.getPositionByName(columns_[i]); const auto & col = header_.getByPosition(pos).column; - - if (!(col && isColumnConst(*col))) + if (col && !isColumnConst(*col)) key_columns_pos.emplace_back(pos); } } diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index a41b5660e0d..b63e1dda084 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -215,6 +215,13 @@ IProcessor::Status FillingTransform::prepare() if (first || filling_row < next_row) { + /// Output if has data. + if (has_output) + { + output.pushData(std::move(output_data)); + has_output = false; + } + generate_suffix = true; return Status::Ready; } diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp index f916ac8c2af..0d6beb1733b 100644 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ b/src/Storages/ExternalDataSourceConfiguration.cpp @@ -18,6 +18,9 @@ #if USE_MYSQL #include #endif +#if USE_NATSIO +#include +#endif #include @@ -542,6 +545,11 @@ template bool getExternalDataSourceConfiguration(const ASTs & args, BaseSettings & settings, ContextPtr context); #endif +#if USE_NATSIO +template +bool getExternalDataSourceConfiguration(const ASTs & args, BaseSettings & settings, ContextPtr context); +#endif + template std::optional getExternalDataSourceConfiguration( const ASTs & args, ContextPtr context, bool is_database_engine, bool throw_on_no_collection, const BaseSettings & storage_settings); diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 4bf77792559..323bcdc100d 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -9,16 +9,14 @@ #include #include #include -#include #include -#include #include #include #include -#include #include #include #include +#include #include #include #include @@ -805,8 +803,8 @@ void registerStorageFileLog(StorageFactory & factory) auto path_ast = evaluateConstantExpressionAsLiteral(engine_args[0], args.getContext()); auto format_ast = evaluateConstantExpressionAsLiteral(engine_args[1], args.getContext()); - auto path = path_ast->as().value.safeGet(); - auto format = format_ast->as().value.safeGet(); + auto path = checkAndGetLiteralArgument(path_ast, "path"); + auto format = checkAndGetLiteralArgument(format_ast, "format"); return std::make_shared( args.table_id, diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 708bfd5ef8b..5e811f8e42c 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -664,13 +665,13 @@ void registerStorageHDFS(StorageFactory & factory) engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); - String url = engine_args[0]->as().value.safeGet(); + String url = checkAndGetLiteralArgument(engine_args[0], "url"); String format_name = "auto"; if (engine_args.size() > 1) { engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext()); - format_name = engine_args[1]->as().value.safeGet(); + format_name = checkAndGetLiteralArgument(engine_args[1], "format_name"); } if (format_name == "auto") @@ -680,7 +681,7 @@ void registerStorageHDFS(StorageFactory & factory) if (engine_args.size() == 3) { engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.getLocalContext()); - compression_method = engine_args[2]->as().value.safeGet(); + compression_method = checkAndGetLiteralArgument(engine_args[2], "compression_method"); } else compression_method = "auto"; ASTPtr partition_by; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index b717d373598..b3e1ea93c94 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -37,7 +38,8 @@ #include #include #include -#include +#include + namespace DB { @@ -959,9 +961,9 @@ void registerStorageHive(StorageFactory & factory) for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.getLocalContext()); - const String & hive_metastore_url = engine_args[0]->as().value.safeGet(); - const String & hive_database = engine_args[1]->as().value.safeGet(); - const String & hive_table = engine_args[2]->as().value.safeGet(); + const String & hive_metastore_url = checkAndGetLiteralArgument(engine_args[0], "hive_metastore_url"); + const String & hive_database = checkAndGetLiteralArgument(engine_args[1], "hive_database"); + const String & hive_table = checkAndGetLiteralArgument(engine_args[2], "hive_table"); return std::make_shared( hive_metastore_url, hive_database, diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 0fcf48b9afc..8bbb2fa1c04 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -1,11 +1,9 @@ #include #include -#include #include #include #include -#include #include #include #include @@ -14,7 +12,8 @@ #include #include #include -#include +#include +#include namespace DB @@ -24,6 +23,7 @@ namespace ErrorCodes extern const int TABLE_IS_DROPPED; extern const int NOT_IMPLEMENTED; extern const int DEADLOCK_AVOIDED; + extern const int CANNOT_RESTORE_TABLE; } bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const @@ -248,48 +248,21 @@ bool IStorage::isStaticStorage() const return false; } -ASTPtr IStorage::getCreateQueryForBackup(const ContextPtr & context, DatabasePtr * database) const +void IStorage::adjustCreateQueryForBackup(ASTPtr &) const { - auto table_id = getStorageID(); - auto db = DatabaseCatalog::instance().tryGetDatabase(table_id.getDatabaseName()); - if (!db) - throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", table_id.database_name, table_id.table_name); - ASTPtr query = db->tryGetCreateTableQuery(table_id.getTableName(), context); - if (!query) - throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", table_id.database_name, table_id.table_name); - - /// We don't want to see any UUIDs in backup (after RESTORE the table will have another UUID anyway). - auto & create = query->as(); - create.uuid = UUIDHelpers::Nil; - create.to_inner_uuid = UUIDHelpers::Nil; - - /// If this is a definition of a system table we'll remove columns and comment because they're excessive for backups. - if (create.storage && create.storage->engine && create.storage->engine->name.starts_with("System")) - { - create.reset(create.columns_list); - create.reset(create.comment); - } - - if (database) - *database = db; - - return query; -} - -ASTPtr IStorage::getCreateQueryForBackup(const BackupEntriesCollector & backup_entries_collector) const -{ - DatabasePtr database; - auto query = getCreateQueryForBackup(backup_entries_collector.getContext(), &database); - database->checkCreateTableQueryForBackup(query, backup_entries_collector); - return query; } void IStorage::backupData(BackupEntriesCollector &, const String &, const std::optional &) { } -void IStorage::restoreDataFromBackup(RestorerFromBackup &, const String &, const std::optional &) +void IStorage::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional &) { + /// If an inherited class doesn't override restoreDataFromBackup() that means it doesn't backup any data. + auto filenames = restorer.getBackup()->listFiles(data_path_in_backup); + if (!filenames.empty()) + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: Folder {} in backup must be empty", + getStorageID().getFullTableName(), data_path_in_backup); } std::string PrewhereInfo::dump() const diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 6dd329db02b..e265c94eb11 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -223,10 +223,8 @@ public: /// Initially reserved virtual column name may be shadowed by real column. bool isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const; - /// Returns a slightly changed version of the CREATE TABLE query which must be written to a backup. - /// The function can throw `TABLE_IS_DROPPED` if this storage is not attached to a database. - virtual ASTPtr getCreateQueryForBackup(const ContextPtr & context, DatabasePtr * database) const; - virtual ASTPtr getCreateQueryForBackup(const BackupEntriesCollector & backup_entries_collector) const; + /// Modify a CREATE TABLE query to make a variant which must be written to a backup. + virtual void adjustCreateQueryForBackup(ASTPtr & create_query) const; /// Makes backup entries to backup the data of this storage. virtual void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions); @@ -234,6 +232,9 @@ public: /// Extracts data from the backup and put it to the storage. virtual void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions); + /// Returns true if the storage supports backup/restore for specific partitions. + virtual bool supportsBackupPartition() const { return false; } + private: StorageID storage_id; diff --git a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp index 02dca993436..c5966d9e322 100644 --- a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp +++ b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -14,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -156,11 +155,11 @@ MeiliSearchConfiguration StorageMeiliSearch::getConfiguration(ASTs engine_args, for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context); - String url = engine_args[0]->as().value.safeGet(); - String index = engine_args[1]->as().value.safeGet(); + String url = checkAndGetLiteralArgument(engine_args[0], "url"); + String index = checkAndGetLiteralArgument(engine_args[1], "index"); String key; if (engine_args.size() == 3) - key = engine_args[2]->as().value.safeGet(); + key = checkAndGetLiteralArgument(engine_args[2], "key"); return MeiliSearchConfiguration(url, index, key); } } diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index 083cbc90cb1..fc4a4554304 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -18,7 +18,6 @@ namespace DB namespace ErrorCodes { - extern const int FILE_DOESNT_EXIST; extern const int DIRECTORY_ALREADY_EXISTS; extern const int NOT_ENOUGH_SPACE; extern const int LOGICAL_ERROR; @@ -199,6 +198,11 @@ void DataPartStorageOnDisk::checkConsistency(const MergeTreeDataPartChecksums & checksums.checkSizes(volume->getDisk(), getRelativePath()); } +DataPartStorageBuilderPtr DataPartStorageOnDisk::getBuilder() const +{ + return std::make_shared(volume, root_path, part_dir); +} + void DataPartStorageOnDisk::remove( bool can_remove_shared_data, const NameSet & names_not_to_remove, @@ -615,36 +619,50 @@ void DataPartStorageOnDisk::backup( TemporaryFilesOnDisks & temp_dirs, const MergeTreeDataPartChecksums & checksums, const NameSet & files_without_checksums, + const String & path_in_backup, BackupEntries & backup_entries) const { - auto disk = volume->getDisk(); + fs::path part_path_on_disk = fs::path{root_path} / part_dir; + fs::path part_path_in_backup = fs::path{path_in_backup} / part_dir; + auto disk = volume->getDisk(); auto temp_dir_it = temp_dirs.find(disk); if (temp_dir_it == temp_dirs.end()) - temp_dir_it = temp_dirs.emplace(disk, std::make_shared(disk, "tmp/backup_")).first; + temp_dir_it = temp_dirs.emplace(disk, std::make_shared(disk, "tmp/backup/")).first; auto temp_dir_owner = temp_dir_it->second; fs::path temp_dir = temp_dir_owner->getPath(); - - fs::path temp_part_dir = temp_dir / part_dir; + fs::path temp_part_dir = temp_dir / part_path_in_backup.relative_path(); disk->createDirectories(temp_part_dir); + /// For example, + /// part_path_in_backup = /data/test/table/0_1_1_0 + /// part_path_on_disk = store/f57/f5728353-44bb-4575-85e8-28deb893657a/0_1_1_0 + /// tmp_part_dir = tmp/backup/1aaaaaa/data/test/table/0_1_1_0 + /// Or, for projections: + /// part_path_in_backup = /data/test/table/0_1_1_0/prjmax.proj + /// part_path_on_disk = store/f57/f5728353-44bb-4575-85e8-28deb893657a/0_1_1_0/prjmax.proj + /// tmp_part_dir = tmp/backup/1aaaaaa/data/test/table/0_1_1_0/prjmax.proj + for (const auto & [filepath, checksum] : checksums.files) { - String relative_filepath = fs::path(part_dir) / filepath; - String full_filepath = fs::path(root_path) / part_dir / filepath; + if (filepath.ends_with(".proj")) + continue; /// Skip *.proj files - they're actually directories and will be handled. + String filepath_on_disk = part_path_on_disk / filepath; + String filepath_in_backup = part_path_in_backup / filepath; String hardlink_filepath = temp_part_dir / filepath; - disk->createHardLink(full_filepath, hardlink_filepath); + + disk->createHardLink(filepath_on_disk, hardlink_filepath); UInt128 file_hash{checksum.file_hash.first, checksum.file_hash.second}; backup_entries.emplace_back( - relative_filepath, + filepath_in_backup, std::make_unique(disk, hardlink_filepath, checksum.file_size, file_hash, temp_dir_owner)); } for (const auto & filepath : files_without_checksums) { - String relative_filepath = fs::path(part_dir) / filepath; - String full_filepath = fs::path(root_path) / part_dir / filepath; - backup_entries.emplace_back(relative_filepath, std::make_unique(disk, full_filepath)); + String filepath_on_disk = part_path_on_disk / filepath; + String filepath_in_backup = part_path_in_backup / filepath; + backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk)); } } @@ -691,15 +709,19 @@ DataPartStoragePtr DataPartStorageOnDisk::clone( return std::make_shared(single_disk_volume, to, dir_path); } -void DataPartStorageOnDisk::rename(const std::string & new_root_path, const std::string & new_part_dir, Poco::Logger * log, bool remove_new_dir_if_exists, bool fsync_part_dir) +void DataPartStorageOnDisk::onRename(const std::string & new_root_path, const std::string & new_part_dir) { - if (!exists()) - throw Exception( - ErrorCodes::FILE_DOESNT_EXIST, - "Part directory {} doesn't exist. Most likely it is a logical error.", - std::string(fs::path(volume->getDisk()->getPath()) / root_path / part_dir)); + part_dir = new_part_dir; + root_path = new_root_path; +} - /// Why "" ? +void DataPartStorageBuilderOnDisk::rename( + const std::string & new_root_path, + const std::string & new_part_dir, + Poco::Logger * log, + bool remove_new_dir_if_exists, + bool fsync_part_dir) +{ String to = fs::path(new_root_path) / new_part_dir / ""; if (volume->getDisk()->exists(to)) @@ -714,7 +736,7 @@ void DataPartStorageOnDisk::rename(const std::string & new_root_path, const std: "Part directory {} already exists and contains {} files. Removing it.", fullPath(volume->getDisk(), to), files.size()); - volume->getDisk()->removeRecursive(to); + transaction->removeRecursive(to); } else { @@ -725,17 +747,13 @@ void DataPartStorageOnDisk::rename(const std::string & new_root_path, const std: } } - // metadata_manager->deleteAll(true); - // metadata_manager->assertAllDeleted(true); - String from = getRelativePath(); /// Why? - volume->getDisk()->setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr))); - volume->getDisk()->moveDirectory(from, to); + transaction->setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr))); + transaction->moveDirectory(from, to); part_dir = new_part_dir; root_path = new_root_path; - // metadata_manager->updateAll(true); SyncGuardPtr sync_guard; if (fsync_part_dir) @@ -768,41 +786,43 @@ void DataPartStorageOnDisk::changeRootPath(const std::string & from_root, const root_path = to_root.substr(0, dst_size) + root_path.substr(prefix_size); } -DataPartStorageBuilderOnDisk::DataPartStorageBuilderOnDisk(VolumePtr volume_, std::string root_path_, std::string part_dir_) - : volume(std::move(volume_)), root_path(std::move(root_path_)), part_dir(std::move(part_dir_)) +DataPartStorageBuilderOnDisk::DataPartStorageBuilderOnDisk( + VolumePtr volume_, + std::string root_path_, + std::string part_dir_) + : volume(std::move(volume_)) + , root_path(std::move(root_path_)) + , part_dir(std::move(part_dir_)) + , transaction(volume->getDisk()->createTransaction()) { } -std::unique_ptr DataPartStorageBuilderOnDisk::readFile( - const std::string & name, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const -{ - return volume->getDisk()->readFile(fs::path(root_path) / part_dir / name, settings, read_hint, file_size); -} - std::unique_ptr DataPartStorageBuilderOnDisk::writeFile( const String & name, size_t buf_size, const WriteSettings & settings) { - return volume->getDisk()->writeFile(fs::path(root_path) / part_dir / name, buf_size, WriteMode::Rewrite, settings); + return transaction->writeFile(fs::path(root_path) / part_dir / name, buf_size, WriteMode::Rewrite, settings, /* autocommit = */ false); } void DataPartStorageBuilderOnDisk::removeFile(const String & name) { - return volume->getDisk()->removeFile(fs::path(root_path) / part_dir / name); + transaction->removeFile(fs::path(root_path) / part_dir / name); +} + +void DataPartStorageBuilderOnDisk::removeFileIfExists(const String & name) +{ + transaction->removeFileIfExists(fs::path(root_path) / part_dir / name); } void DataPartStorageBuilderOnDisk::removeRecursive() { - volume->getDisk()->removeRecursive(fs::path(root_path) / part_dir); + transaction->removeRecursive(fs::path(root_path) / part_dir); } void DataPartStorageBuilderOnDisk::removeSharedRecursive(bool keep_in_remote_fs) { - volume->getDisk()->removeSharedRecursive(fs::path(root_path) / part_dir, keep_in_remote_fs, {}); + transaction->removeSharedRecursive(fs::path(root_path) / part_dir, keep_in_remote_fs, {}); } SyncGuardPtr DataPartStorageBuilderOnDisk::getDirectorySyncGuard() const @@ -819,7 +839,7 @@ void DataPartStorageBuilderOnDisk::createHardLinkFrom(const IDataPartStorage & s "Cannot create hardlink from different storage. Expected DataPartStorageOnDisk, got {}", typeid(source).name()); - volume->getDisk()->createHardLink( + transaction->createHardLink( fs::path(source_on_disk->getRelativePath()) / from, fs::path(root_path) / part_dir / to); } @@ -829,12 +849,6 @@ bool DataPartStorageBuilderOnDisk::exists() const return volume->getDisk()->exists(fs::path(root_path) / part_dir); } - -bool DataPartStorageBuilderOnDisk::exists(const std::string & name) const -{ - return volume->getDisk()->exists(fs::path(root_path) / part_dir / name); -} - std::string DataPartStorageBuilderOnDisk::getFullPath() const { return fs::path(volume->getDisk()->getPath()) / root_path / part_dir; @@ -847,12 +861,13 @@ std::string DataPartStorageBuilderOnDisk::getRelativePath() const void DataPartStorageBuilderOnDisk::createDirectories() { - return volume->getDisk()->createDirectories(fs::path(root_path) / part_dir); + LOG_INFO(&Poco::Logger::get("DEBUG"), "CREATING DIRECTORY {}", (fs::path(root_path) / part_dir).string()); + transaction->createDirectories(fs::path(root_path) / part_dir); } void DataPartStorageBuilderOnDisk::createProjection(const std::string & name) { - return volume->getDisk()->createDirectory(fs::path(root_path) / part_dir / name); + transaction->createDirectory(fs::path(root_path) / part_dir / name); } ReservationPtr DataPartStorageBuilderOnDisk::reserve(UInt64 bytes) @@ -874,4 +889,9 @@ DataPartStoragePtr DataPartStorageBuilderOnDisk::getStorage() const return std::make_shared(volume, root_path, part_dir); } +void DataPartStorageBuilderOnDisk::commit() +{ + transaction->commit(); +} + } diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.h b/src/Storages/MergeTree/DataPartStorageOnDisk.h index d6fcb2f1442..2426b5eee80 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.h +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include @@ -53,6 +54,7 @@ public: std::string getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached) const override; void setRelativePath(const std::string & path) override; + void onRename(const std::string & new_root_path, const std::string & new_part_dir) override; std::string getDiskName() const override; std::string getDiskType() const override; @@ -88,6 +90,7 @@ public: TemporaryFilesOnDisks & temp_dirs, const MergeTreeDataPartChecksums & checksums, const NameSet & files_without_checksums, + const String & path_in_backup, BackupEntries & backup_entries) const override; DataPartStoragePtr freeze( @@ -103,10 +106,9 @@ public: const DiskPtr & disk, Poco::Logger * log) const override; - void rename(const std::string & new_root_path, const std::string & new_part_dir, Poco::Logger * log, bool remove_new_dir_if_exists, bool fsync_part_dir) override; - void changeRootPath(const std::string & from_root, const std::string & to_root) override; + DataPartStorageBuilderPtr getBuilder() const override; private: VolumePtr volume; std::string root_path; @@ -130,7 +132,6 @@ public: void setRelativePath(const std::string & path) override; bool exists() const override; - bool exists(const std::string & name) const override; void createDirectories() override; void createProjection(const std::string & name) override; @@ -139,18 +140,13 @@ public: std::string getFullPath() const override; std::string getRelativePath() const override; - std::unique_ptr readFile( - const std::string & name, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const override; - std::unique_ptr writeFile( const String & name, size_t buf_size, const WriteSettings & settings) override; void removeFile(const String & name) override; + void removeFileIfExists(const String & name) override; void removeRecursive() override; void removeSharedRecursive(bool keep_in_remote_fs) override; @@ -164,10 +160,20 @@ public: DataPartStoragePtr getStorage() const override; + void rename( + const std::string & new_root_path, + const std::string & new_part_dir, + Poco::Logger * log, + bool remove_new_dir_if_exists, + bool fsync_part_dir) override; + + void commit() override; + private: VolumePtr volume; std::string root_path; std::string part_dir; + DiskTransactionPtr transaction; }; } diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 313bde658cb..3eedac2888a 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -347,7 +347,7 @@ void Service::sendPartFromDiskRemoteMeta(const MergeTreeData::DataPartPtr & part /// Serialized metadatadatas with zero ref counts. auto metadatas = data_part_storage_on_disk->getSerializedMetadata(paths); - String part_id = part->getUniqueId(); + String part_id = data_part_storage_on_disk->getUniqueId(); writeStringBinary(part_id, out); writeBinary(checksums.files.size(), out); @@ -869,12 +869,12 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta( part_relative_path, part_dir); - if (data_part_storage_builder->exists()) - throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Directory {} already exists.", data_part_storage_builder->getFullPath()); + if (data_part_storage->exists()) + throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Directory {} already exists.", data_part_storage->getFullPath()); CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedFetch}; - data_part_storage_builder->createDirectories(); + volume->getDisk()->createDirectories(data_part_storage->getFullPath()); size_t files; readBinary(files, in); @@ -887,7 +887,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta( readStringBinary(file_name, in); readBinary(file_size, in); - String metadata_file = fs::path(data_part_storage_builder->getFullPath()) / file_name; + String metadata_file = fs::path(data_part_storage->getFullPath()) / file_name; { auto file_out = std::make_unique(metadata_file, DBMS_DEFAULT_BUFFER_SIZE, -1, 0666, nullptr, 0); @@ -902,6 +902,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta( /// performing a poll with a not very large timeout. /// And now we check it only between read chunks (in the `copyData` function). data_part_storage_builder->removeSharedRecursive(true); + data_part_storage_builder->commit(); throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED); } @@ -919,6 +920,8 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta( assertEOF(in); + data_part_storage_builder->commit(); + MergeTreeData::MutableDataPartPtr new_data_part = data.createPart(part_name, data_part_storage); new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr); new_data_part->is_temp = true; diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 0e165e74ed0..f0173baecb7 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -53,6 +53,9 @@ struct WriteSettings; class TemporaryFileOnDisk; +class IDataPartStorageBuilder; +using DataPartStorageBuilderPtr = std::shared_ptr; + /// This is an abstraction of storage for data part files. /// Ideally, it is assumed to contains read-only methods from IDisk. /// It is not fulfilled now, but let's try our best. @@ -122,6 +125,7 @@ public: /// Reset part directory, used for im-memory parts. /// TODO: remove it. virtual void setRelativePath(const std::string & path) = 0; + virtual void onRename(const std::string & new_root_path, const std::string & new_part_dir) = 0; /// Some methods from IDisk. Needed to avoid getting internal IDisk interface. virtual std::string getDiskName() const = 0; @@ -173,6 +177,7 @@ public: TemporaryFilesOnDisks & temp_dirs, const MergeTreeDataPartChecksums & checksums, const NameSet & files_without_checksums, + const String & path_in_backup, BackupEntries & backup_entries) const = 0; /// Creates hardlinks into 'to/dir_path' for every file in data part. @@ -191,20 +196,14 @@ public: const DiskPtr & disk, Poco::Logger * log) const = 0; - /// Rename part. - /// Ideally, new_root_path should be the same as current root (but it is not true). - /// Examples are: 'all_1_2_1' -> 'detached/all_1_2_1' - /// 'moving/tmp_all_1_2_1' -> 'all_1_2_1' - virtual void rename( - const std::string & new_root_path, - const std::string & new_part_dir, - Poco::Logger * log, - bool remove_new_dir_if_exists, - bool fsync_part_dir) = 0; - /// Change part's root. from_root should be a prefix path of current root path. /// Right now, this is needed for rename table query. virtual void changeRootPath(const std::string & from_root, const std::string & to_root) = 0; + + /// Leak of abstraction as well. We should use builder as one-time object which allow + /// us to build parts, while storage should be read-only method to access part properties + /// related to disk. However our code is really tricky and sometimes we need ad-hoc builders. + virtual DataPartStorageBuilderPtr getBuilder() const = 0; }; using DataPartStoragePtr = std::shared_ptr; @@ -223,20 +222,14 @@ public: virtual std::string getRelativePath() const = 0; virtual bool exists() const = 0; - virtual bool exists(const std::string & name) const = 0; virtual void createDirectories() = 0; virtual void createProjection(const std::string & name) = 0; - virtual std::unique_ptr readFile( - const std::string & name, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const = 0; - virtual std::unique_ptr writeFile(const String & name, size_t buf_size, const WriteSettings & settings) = 0; virtual void removeFile(const String & name) = 0; + virtual void removeFileIfExists(const String & name) = 0; virtual void removeRecursive() = 0; virtual void removeSharedRecursive(bool keep_in_remote_fs) = 0; @@ -249,8 +242,21 @@ public: virtual std::shared_ptr getProjection(const std::string & name) const = 0; virtual DataPartStoragePtr getStorage() const = 0; + + /// Rename part. + /// Ideally, new_root_path should be the same as current root (but it is not true). + /// Examples are: 'all_1_2_1' -> 'detached/all_1_2_1' + /// 'moving/tmp_all_1_2_1' -> 'all_1_2_1' + /// + /// To notify storage also call onRename for it with first two args + virtual void rename( + const std::string & new_root_path, + const std::string & new_part_dir, + Poco::Logger * log, + bool remove_new_dir_if_exists, + bool fsync_part_dir) = 0; + + virtual void commit() = 0; }; -using DataPartStorageBuilderPtr = std::shared_ptr; - } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 3cb3d052b0c..60941108f00 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1358,20 +1358,7 @@ bool IMergeTreeDataPart::shallParticipateInMerges(const StoragePolicyPtr & stora return data_part_storage->shallParticipateInMerges(*storage_policy); } -// UInt64 IMergeTreeDataPart::calculateTotalSizeOnDisk(const DataPartStoragePtr & data_part_storage_, const String & from) -// { -// if (data_part_storage_->isFile(from)) -// return data_part_storage_->getFileSize(from); -// std::vector files; -// disk_->listFiles(from, files); -// UInt64 res = 0; -// for (const auto & file : files) -// res += calculateTotalSizeOnDisk(data_part_storage_, fs::path(from) / file); -// return res; -// } - - -void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) const +void IMergeTreeDataPart::renameTo(const String & new_relative_path, bool remove_new_dir_if_exists, DataPartStorageBuilderPtr builder) const try { assertOnDisk(); @@ -1390,7 +1377,8 @@ try metadata_manager->deleteAll(true); metadata_manager->assertAllDeleted(true); - data_part_storage->rename(to.parent_path(), to.filename(), storage.log, remove_new_dir_if_exists, fsync_dir); + builder->rename(to.parent_path(), to.filename(), storage.log, remove_new_dir_if_exists, fsync_dir); + data_part_storage->onRename(to.parent_path(), to.filename()); metadata_manager->updateAll(true); for (const auto & [p_name, part] : projection_parts) @@ -1486,9 +1474,9 @@ String IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix) return "detached/" + getRelativePathForPrefix(prefix, /* detached */ true); } -void IMergeTreeDataPart::renameToDetached(const String & prefix) const +void IMergeTreeDataPart::renameToDetached(const String & prefix, DataPartStorageBuilderPtr builder) const { - renameTo(getRelativePathForDetachedPart(prefix), true); + renameTo(getRelativePathForDetachedPart(prefix), true, builder); part_is_probably_removed_from_disk = true; } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 66a6fe065f7..7f3c41ce4c2 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -35,9 +35,6 @@ struct FutureMergedMutatedPart; class IReservation; using ReservationPtr = std::unique_ptr; -class IVolume; -using VolumePtr = std::shared_ptr; - class IMergeTreeReader; class IMergeTreeDataPartWriter; class MarkCache; @@ -341,11 +338,11 @@ public: size_t getFileSizeOrZero(const String & file_name) const; /// Moves a part to detached/ directory and adds prefix to its name - void renameToDetached(const String & prefix) const; + void renameToDetached(const String & prefix, DataPartStorageBuilderPtr builder) const; /// Makes checks and move part to new directory /// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly - virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) const; + virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists, DataPartStorageBuilderPtr builder) const; /// Makes clone of a part in detached/ directory via hard links virtual void makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 4121262e7b5..0e99d6ce04e 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -260,11 +260,12 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWriter write_part_log) { part = merge_task->getFuture().get(); + auto builder = merge_task->getBuilder(); /// Task is not needed merge_task.reset(); - storage.merger_mutator.renameMergedTemporaryPart(part, parts, NO_TRANSACTION_PTR, *transaction_ptr); + storage.merger_mutator.renameMergedTemporaryPart(part, parts, NO_TRANSACTION_PTR, *transaction_ptr, builder); try { diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index cc5e87956a1..0dcdd927e7b 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -115,9 +115,10 @@ void MergePlainMergeTreeTask::prepare() void MergePlainMergeTreeTask::finish() { new_part = merge_task->getFuture().get(); + auto builder = merge_task->getBuilder(); MergeTreeData::Transaction transaction(storage, txn.get()); - storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, transaction); + storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, transaction, builder); transaction.commit(); write_part_log({}); diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index bb86a5072e0..0ddeb70763c 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -102,6 +102,11 @@ public: return global_ctx->promise.get_future(); } + DataPartStorageBuilderPtr getBuilder() + { + return global_ctx->data_part_storage_builder; + } + bool execute(); private: diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 2c4dcfa05ee..e5dc11e9916 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -150,6 +150,7 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; extern const int TOO_MANY_SIMULTANEOUS_QUERIES; extern const int INCORRECT_QUERY; + extern const int CANNOT_RESTORE_TABLE; } static void checkSampleExpression(const StorageInMemoryMetadata & metadata, bool allow_sampling_expression_not_in_primary_key, bool check_sample_column_is_correct) @@ -1317,7 +1318,11 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) loadDataPartsFromWAL(broken_parts_to_detach, duplicate_parts_to_remove, parts_from_wal, part_lock); for (auto & part : broken_parts_to_detach) - part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes + { + auto builder = part->data_part_storage->getBuilder(); + part->renameToDetached("broken-on-start", builder); /// detached parts must not have '_' in prefixes + builder->commit(); + } for (auto & part : duplicate_parts_to_remove) part->remove(); @@ -2795,11 +2800,12 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( bool MergeTreeData::renameTempPartAndAdd( MutableDataPartPtr & part, Transaction & out_transaction, + DataPartStorageBuilderPtr builder, DataPartsLock & lock) { DataPartsVector covered_parts; - if (!renameTempPartAndReplaceImpl(part, out_transaction, lock, &covered_parts)) + if (!renameTempPartAndReplaceImpl(part, out_transaction, lock, builder, &covered_parts)) return false; if (!covered_parts.empty()) @@ -2833,22 +2839,22 @@ void MergeTreeData::checkPartCanBeAddedToTable(MutableDataPartPtr & part, DataPa } } -void MergeTreeData::preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, bool need_rename) +void MergeTreeData::preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, DataPartStorageBuilderPtr builder) { part->is_temp = false; part->setState(DataPartState::PreActive); - if (need_rename) - part->renameTo(part->name, true); + part->renameTo(part->name, true, builder); data_parts_indexes.insert(part); - out_transaction.precommitted_parts.insert(part); + out_transaction.addPart(part, builder); } bool MergeTreeData::renameTempPartAndReplaceImpl( MutableDataPartPtr & part, Transaction & out_transaction, DataPartsLock & lock, + DataPartStorageBuilderPtr builder, DataPartsVector * out_covered_parts) { LOG_TRACE(log, "Renaming temporary part {} to {}.", part->data_part_storage->getPartDirectory(), part->name); @@ -2870,7 +2876,7 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( /// All checks are passed. Now we can rename the part on disk. /// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts - preparePartForCommit(part, out_transaction, /* need_rename = */ true); + preparePartForCommit(part, out_transaction, builder); if (out_covered_parts) { @@ -2886,20 +2892,21 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplaceUnlocked( MutableDataPartPtr & part, Transaction & out_transaction, + DataPartStorageBuilderPtr builder, DataPartsLock & lock) { DataPartsVector covered_parts; - renameTempPartAndReplaceImpl(part, out_transaction, lock, &covered_parts); - + renameTempPartAndReplaceImpl(part, out_transaction, lock, builder, &covered_parts); return covered_parts; } MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( MutableDataPartPtr & part, - Transaction & out_transaction) + Transaction & out_transaction, + DataPartStorageBuilderPtr builder) { auto part_lock = lockParts(); - return renameTempPartAndReplaceUnlocked(part, out_transaction, part_lock); + return renameTempPartAndReplaceUnlocked(part, out_transaction, builder, part_lock); } void MergeTreeData::removePartsFromWorkingSet(MergeTreeTransaction * txn, const MergeTreeData::DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock) @@ -3104,7 +3111,9 @@ void MergeTreeData::forgetPartAndMoveToDetached(const MergeTreeData::DataPartPtr modifyPartState(it_part, DataPartState::Deleting); - part->renameToDetached(prefix); + auto builder = part->data_part_storage->getBuilder(); + part->renameToDetached(prefix, builder); + builder->commit(); data_parts_indexes.erase(it_part); @@ -3449,7 +3458,7 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) if (original_active_part->data_part_storage->supportZeroCopyReplication() && part_copy->data_part_storage->supportZeroCopyReplication() && - original_active_part->getUniqueId() == part_copy->getUniqueId()) + original_active_part->data_part_storage->getUniqueId() == part_copy->data_part_storage->getUniqueId()) { /// May be when several volumes use the same S3/HDFS storage original_active_part->force_keep_shared_data = true; @@ -3993,14 +4002,23 @@ BackupEntries MergeTreeData::backupParts(const ContextPtr & local_context, const BackupEntries backup_entries; std::map> temp_dirs; - fs::path data_path_in_backup_fs = data_path_in_backup; for (const auto & part : data_parts) - part->data_part_storage->backup(temp_dirs, part->checksums, part->getFileNamesWithoutChecksums(), backup_entries); + { + part->data_part_storage->backup( + temp_dirs, part->checksums, part->getFileNamesWithoutChecksums(), data_path_in_backup, backup_entries); - /// TODO: try to write better code later. - for (auto & entry : backup_entries) - entry.first = data_path_in_backup_fs / entry.first; + auto projection_parts = part->getProjectionParts(); + for (const auto & [projection_name, projection_part] : projection_parts) + { + projection_part->data_part_storage->backup( + temp_dirs, + projection_part->checksums, + projection_part->getFileNamesWithoutChecksums(), + fs::path{data_path_in_backup} / part->name, + backup_entries); + } + } return backup_entries; } @@ -4083,7 +4101,10 @@ void MergeTreeData::restorePartsFromBackup(RestorerFromBackup & restorer, const { const auto part_info = MergeTreePartInfo::tryParsePartName(part_name, format_version); if (!part_info) - continue; + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File name {} doesn't look like the name of a part", + getStorageID().getFullTableName(), String{data_path_in_backup_fs / part_name}); + } if (partition_ids && !partition_ids->contains(part_info->partition_id)) continue; @@ -4116,27 +4137,39 @@ void MergeTreeData::restorePartFromBackup(std::shared_ptr r auto disk = reservation->getDisk(); String part_name = part_info.getPartName(); - auto temp_part_dir_owner = std::make_shared(disk, relative_data_path + "restoring_" + part_name + "_"); - String temp_part_dir = temp_part_dir_owner->getPath(); + auto temp_part_dir_owner = std::make_shared(disk, fs::path{relative_data_path} / ("restoring_" + part_name + "_")); + fs::path temp_part_dir = temp_part_dir_owner->getPath(); disk->createDirectories(temp_part_dir); + std::unordered_set subdirs; - assert(temp_part_dir.starts_with(relative_data_path)); - String relative_temp_part_dir = temp_part_dir.substr(relative_data_path.size()); + /// temp_part_name = "restoring__", for example "restoring_0_1_1_0_1baaaaa" + String temp_part_name = temp_part_dir.filename(); for (const String & filename : filenames) { + /// Needs to create subdirectories before copying the files. Subdirectories are used to represent projections. + auto separator_pos = filename.rfind('/'); + if (separator_pos != String::npos) + { + String subdir = filename.substr(0, separator_pos); + if (subdirs.emplace(subdir).second) + disk->createDirectories(temp_part_dir / subdir); + } + + /// TODO Transactions: Decide what to do with version metadata (if any). Let's just skip it for now. + if (filename.ends_with(IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)) + continue; + auto backup_entry = backup->readFile(part_path_in_backup_fs / filename); auto read_buffer = backup_entry->getReadBuffer(); - auto write_buffer = disk->writeFile(fs::path(temp_part_dir) / filename); + auto write_buffer = disk->writeFile(temp_part_dir / filename); copyData(*read_buffer, *write_buffer); reservation->update(reservation->getSize() - backup_entry->getSize()); } auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); - auto data_part_storage = std::make_shared(single_disk_volume, relative_data_path, relative_temp_part_dir); + auto data_part_storage = std::make_shared(single_disk_volume, relative_data_path, temp_part_name); auto part = createPart(part_name, part_info, data_part_storage); - /// TODO Transactions: Decide what to do with version metadata (if any). Let's just remove it for now. - disk->removeFileIfExists(fs::path(temp_part_dir) / IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME); part->version.setCreationTID(Tx::PrehistoricTID, nullptr); part->loadColumnsChecksumsIndexes(false, true); @@ -4832,6 +4865,12 @@ void MergeTreeData::Transaction::rollbackPartsToTemporaryState() clear(); } +void MergeTreeData::Transaction::addPart(MutableDataPartPtr & part, DataPartStorageBuilderPtr builder) +{ + precommitted_parts.insert(part); + part_builders.push_back(builder); +} + void MergeTreeData::Transaction::rollback() { if (!isEmpty()) @@ -4862,6 +4901,8 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData: auto parts_lock = acquired_parts_lock ? MergeTreeData::DataPartsLock() : data.lockParts(); auto * owing_parts_lock = acquired_parts_lock ? acquired_parts_lock : &parts_lock; + for (auto & builder : part_builders) + builder->commit(); if (txn) { @@ -5438,7 +5479,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (analysis_result.before_where) { candidate.where_column_name = analysis_result.where_column_name; - candidate.remove_where_filter = analysis_result.remove_where_filter; + candidate.remove_where_filter = !required_columns.contains(analysis_result.where_column_name); candidate.before_where = analysis_result.before_where->clone(); auto new_required_columns = candidate.before_where->foldActionsByProjection( @@ -5571,20 +5612,9 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg candidate.before_aggregation->reorderAggregationKeysForProjection(key_name_pos_map); candidate.before_aggregation->addAggregatesViaProjection(aggregates); - // minmax_count_projections only have aggregation actions - if (minmax_count_projection) - candidate.required_columns = {required_columns.begin(), required_columns.end()}; - if (rewrite_before_where(candidate, projection, required_columns, sample_block_for_keys, aggregates)) { - if (minmax_count_projection) - { - candidate.before_where = nullptr; - candidate.prewhere_info = nullptr; - } - else - candidate.required_columns = {required_columns.begin(), required_columns.end()}; - + candidate.required_columns = {required_columns.begin(), required_columns.end()}; for (const auto & aggregate : aggregates) candidate.required_columns.push_back(aggregate.name); candidates.push_back(std::move(candidate)); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 4fd7dd7d3cf..1b86df8f898 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -256,6 +256,8 @@ public: DataPartsVector commit(MergeTreeData::DataPartsLock * acquired_parts_lock = nullptr); + void addPart(MutableDataPartPtr & part, DataPartStorageBuilderPtr builder); + void rollback(); /// Immediately remove parts from table's data_parts set and change part @@ -283,6 +285,7 @@ public: MergeTreeData & data; MergeTreeTransaction * txn; DataParts precommitted_parts; + std::vector part_builders; DataParts locked_parts; void clear() { precommitted_parts.clear(); } @@ -554,18 +557,21 @@ public: bool renameTempPartAndAdd( MutableDataPartPtr & part, Transaction & transaction, + DataPartStorageBuilderPtr builder, DataPartsLock & lock); /// The same as renameTempPartAndAdd but the block range of the part can contain existing parts. /// Returns all parts covered by the added part (in ascending order). DataPartsVector renameTempPartAndReplace( MutableDataPartPtr & part, - Transaction & out_transaction); + Transaction & out_transaction, + DataPartStorageBuilderPtr builder); /// Unlocked version of previous one. Useful when added multiple parts with a single lock. DataPartsVector renameTempPartAndReplaceUnlocked( MutableDataPartPtr & part, Transaction & out_transaction, + DataPartStorageBuilderPtr builder, DataPartsLock & lock); /// Remove parts from working set immediately (without wait for background @@ -716,6 +722,9 @@ public: /// Extract data from the backup and put it to the storage. void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; + /// Returns true if the storage supports backup/restore for specific partitions. + bool supportsBackupPartition() const override { return true; } + /// Moves partition to specified Disk void movePartitionToDisk(const ASTPtr & partition, const String & name, bool moving_part, ContextPtr context); @@ -1244,7 +1253,7 @@ private: /// Preparing itself to be committed in memory: fill some fields inside part, add it to data_parts_indexes /// in precommitted state and to transasction - void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, bool need_rename); + void preparePartForCommit(MutableDataPartPtr & part, Transaction & out_transaction, DataPartStorageBuilderPtr builder); /// Low-level method for preparing parts for commit (in-memory). /// FIXME Merge MergeTreeTransaction and Transaction @@ -1252,6 +1261,7 @@ private: MutableDataPartPtr & part, Transaction & out_transaction, DataPartsLock & lock, + DataPartStorageBuilderPtr builder, DataPartsVector * out_covered_parts); /// RAII Wrapper for atomic work with currently moving parts diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index c740489b76d..11fd861882b 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -541,7 +541,8 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart MergeTreeData::MutableDataPartPtr & new_data_part, const MergeTreeData::DataPartsVector & parts, const MergeTreeTransactionPtr & txn, - MergeTreeData::Transaction & out_transaction) + MergeTreeData::Transaction & out_transaction, + DataPartStorageBuilderPtr builder) { /// Some of source parts was possibly created in transaction, so non-transactional merge may break isolation. if (data.transactions_enabled.load(std::memory_order_relaxed) && !txn) @@ -549,7 +550,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart "but transactions were enabled for this table"); /// Rename new part, add to the set and remove original parts. - auto replaced_parts = data.renameTempPartAndReplace(new_data_part, out_transaction); + auto replaced_parts = data.renameTempPartAndReplace(new_data_part, out_transaction, builder); /// Let's check that all original parts have been deleted and only them. if (replaced_parts.size() != parts.size()) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 587b6b26347..14eb82c641c 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -133,7 +133,8 @@ public: MergeTreeData::MutableDataPartPtr & new_data_part, const MergeTreeData::DataPartsVector & parts, const MergeTreeTransactionPtr & txn, - MergeTreeData::Transaction & out_transaction); + MergeTreeData::Transaction & out_transaction, + DataPartStorageBuilderPtr builder); /// The approximate amount of disk space needed for merge or mutation. With a surplus. diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 4c87daa1e13..b22356a38ed 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -144,7 +144,7 @@ void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const flushToDisk(detached_path, metadata_snapshot); } -void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */) const +void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */, DataPartStorageBuilderPtr) const { data_part_storage->setRelativePath(new_relative_path); diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index c9caf043b7b..d985c7f055e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -47,7 +47,7 @@ public: bool isStoredOnRemoteDiskWithZeroCopySupport() const override { return false; } bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); } String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } - void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) const override; + void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists, DataPartStorageBuilderPtr) const override; void makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override; DataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index db3580e1f86..e3925940553 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -414,12 +414,14 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai String mrk_path = escaped_name + marks_file_extension; String bin_path = escaped_name + DATA_FILE_EXTENSION; + auto data_part_storage = data_part_storage_builder->getStorage(); + /// Some columns may be removed because of ttl. Skip them. - if (!data_part_storage_builder->exists(mrk_path)) + if (!data_part_storage->exists(mrk_path)) return; - auto mrk_in = data_part_storage_builder->readFile(mrk_path, {}, std::nullopt, std::nullopt); - DB::CompressedReadBufferFromFile bin_in(data_part_storage_builder->readFile(bin_path, {}, std::nullopt, std::nullopt)); + auto mrk_in = data_part_storage->readFile(mrk_path, {}, std::nullopt, std::nullopt); + DB::CompressedReadBufferFromFile bin_in(data_part_storage->readFile(bin_path, {}, std::nullopt, std::nullopt)); bool must_be_last = false; UInt64 offset_in_compressed_file = 0; UInt64 offset_in_decompressed_block = 0; diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index f7c544132bb..89042e25a0e 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -410,15 +410,17 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( if (new_data_part->data_part_storage->exists()) { - LOG_WARNING(log, "Removing old temporary directory {}", new_data_part->data_part_storage->getFullPath()); - data_part_volume->getDisk()->removeRecursive(full_path); + LOG_WARNING(log, "Removing old temporary directory {}", full_path); + data_part_storage_builder->removeRecursive(); } - const auto disk = data_part_volume->getDisk(); - disk->createDirectories(full_path); + data_part_storage_builder->createDirectories(); if (data.getSettings()->fsync_part_directory) + { + const auto disk = data_part_volume->getDisk(); sync_guard = disk->getDirectorySyncGuard(full_path); + } } if (metadata_snapshot->hasRowsTTL()) @@ -457,6 +459,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( { auto proj_temp_part = writeProjectionPart(data, log, projection_block, projection, data_part_storage_builder, new_data_part.get()); new_data_part->addProjectionPart(projection.name, std::move(proj_temp_part.part)); + proj_temp_part.builder->commit(); for (auto & stream : proj_temp_part.streams) temp_part.streams.emplace_back(std::move(stream)); } @@ -469,6 +472,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( context->getWriteSettings()); temp_part.part = new_data_part; + temp_part.builder = data_part_storage_builder; temp_part.streams.emplace_back(TemporaryPart::Stream{.stream = std::move(out), .finalizer = std::move(finalizer)}); ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows()); @@ -585,10 +589,9 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( out->writeWithPermutation(block, perm_ptr); auto finalizer = out->finalizePartAsync(new_data_part, false); temp_part.part = new_data_part; + temp_part.builder = projection_part_storage_builder; temp_part.streams.emplace_back(TemporaryPart::Stream{.stream = std::move(out), .finalizer = std::move(finalizer)}); - // out.finish(new_data_part, std::move(written_files), false); - ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterRows, block.rows()); ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterUncompressedBytes, block.bytes()); ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterCompressedBytes, new_data_part->getBytesOnDisk()); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 147b38e828a..29c7baaa775 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -34,7 +34,10 @@ using BlocksWithPartition = std::vector; class MergeTreeDataWriter { public: - explicit MergeTreeDataWriter(MergeTreeData & data_) : data(data_), log(&Poco::Logger::get(data.getLogName() + " (Writer)")) {} + explicit MergeTreeDataWriter(MergeTreeData & data_) + : data(data_) + , log(&Poco::Logger::get(data.getLogName() + " (Writer)")) + {} /** Split the block to blocks, each of them must be written as separate part. * (split rows by partition) @@ -51,6 +54,7 @@ public: struct TemporaryPart { MergeTreeData::MutableDataPartPtr part; + DataPartStorageBuilderPtr builder; struct Stream { diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index 2df17702c03..a723fa6d8e3 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -264,8 +264,11 @@ void MergeTreePartsMover::swapClonedPart(const MergeTreeData::DataPartPtr & clon return; } + auto builder = cloned_part->data_part_storage->getBuilder(); /// Don't remove new directory but throw an error because it may contain part which is currently in use. - cloned_part->renameTo(active_part->name, false); + cloned_part->renameTo(active_part->name, false, builder); + + builder->commit(); /// TODO what happen if server goes down here? data->swapActivePart(cloned_part); diff --git a/src/Storages/MergeTree/MergeTreePartsMover.h b/src/Storages/MergeTree/MergeTreePartsMover.h index 45bbce01d00..6ad658c2cb3 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.h +++ b/src/Storages/MergeTree/MergeTreePartsMover.h @@ -50,14 +50,14 @@ public: const std::lock_guard & moving_parts_lock); /// Copies part to selected reservation in detached folder. Throws exception if part already exists. - std::shared_ptr clonePart(const MergeTreeMoveEntry & moving_part) const; + MergeTreeDataPartPtr clonePart(const MergeTreeMoveEntry & moving_part) const; /// Replaces cloned part from detached directory into active data parts set. /// Replacing part changes state to DeleteOnDestroy and will be removed from disk after destructor of ///IMergeTreeDataPart called. If replacing part doesn't exists or not active (committed) than /// cloned part will be removed and log message will be reported. It may happen in case of concurrent /// merge or mutation. - void swapClonedPart(const std::shared_ptr & cloned_parts) const; + void swapClonedPart(const MergeTreeDataPartPtr & cloned_parts) const; /// Can stop background moves and moves from queries ActionBlocker moves_blocker; diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 2f860b34fd5..81c5708f220 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -139,6 +139,9 @@ void MergeTreeSink::finishDelayedChunk() bool added = false; + /// It's important to create it outside of lock scope because + /// otherwise it can lock parts in destructor and deadlock is possible. + MergeTreeData::Transaction transaction(storage, context->getCurrentTransaction().get()); { auto lock = storage.lockParts(); storage.fillNewPartName(part, lock); @@ -155,19 +158,15 @@ void MergeTreeSink::finishDelayedChunk() } else { - MergeTreeData::Transaction transaction(storage, context->getCurrentTransaction().get()); - added = storage.renameTempPartAndAdd(part, transaction, lock); + added = storage.renameTempPartAndAdd(part, transaction, partition.temp_part.builder, lock); transaction.commit(&lock); - } } else { - MergeTreeData::Transaction transaction(storage, context->getCurrentTransaction().get()); - added = storage.renameTempPartAndAdd(part, transaction, lock); + added = storage.renameTempPartAndAdd(part, transaction, partition.temp_part.builder, lock); transaction.commit(&lock); } - } /// Part can be deduplicated, so increment counters and add to part log only if it's really added diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 740e57a136e..d48a8b90646 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -81,9 +81,7 @@ MergedColumnOnlyOutputStream::fillChecksums( for (const String & removed_file : removed_files) { - /// Can be called multiple times, don't need to remove file twice - if (data_part_storage_builder->exists(removed_file)) - data_part_storage_builder->removeFile(removed_file); + data_part_storage_builder->removeFileIfExists(removed_file); if (all_checksums.files.contains(removed_file)) all_checksums.files.erase(removed_file); diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index ac4de51bbd9..a51eb7854ab 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -170,8 +170,12 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWriter write_part_log) { new_part = mutate_task->getFuture().get(); + auto builder = mutate_task->getBuilder(); - storage.renameTempPartAndReplace(new_part, *transaction_ptr); + if (!builder) + builder = new_part->data_part_storage->getBuilder(); + + storage.renameTempPartAndReplace(new_part, *transaction_ptr, builder); try { diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 6214919337b..0cf10ee1935 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -68,13 +68,13 @@ bool MutatePlainMergeTreeTask::executeStep() switch (state) { - case State::NEED_PREPARE : + case State::NEED_PREPARE: { prepare(); state = State::NEED_EXECUTE; return true; } - case State::NEED_EXECUTE : + case State::NEED_EXECUTE: { try { @@ -83,10 +83,14 @@ bool MutatePlainMergeTreeTask::executeStep() new_part = mutate_task->getFuture().get(); + auto builder = mutate_task->getBuilder(); + if (!builder) + builder = new_part->data_part_storage->getBuilder(); + MergeTreeData::Transaction transaction(storage, merge_mutate_entry->txn.get()); /// FIXME Transactions: it's too optimistic, better to lock parts before starting transaction - storage.renameTempPartAndReplace(new_part, transaction); + storage.renameTempPartAndReplace(new_part, transaction, builder); transaction.commit(); storage.updateMutationEntriesErrors(future_part, true, ""); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index d39f3f704a1..3a5aa2f8860 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -46,6 +46,7 @@ static bool checkOperationIsNotCanceled(ActionBlocker & merges_blocker, MergeLis return true; } + /** Split mutation commands into two parts: * First part should be executed by mutations interpreter. * Other is just simple drop/renames, so they can be executed without interpreter. @@ -690,8 +691,7 @@ public: MergeTreeData::MutableDataPartsVector && parts_, const ProjectionDescription & projection_, size_t & block_num_, - MutationContextPtr ctx_ - ) + MutationContextPtr ctx_) : name(std::move(name_)) , parts(std::move(parts_)) , projection(projection_) @@ -736,9 +736,11 @@ public: if (next_level_parts.empty()) { LOG_DEBUG(log, "Merged a projection part in level {}", current_level); - selected_parts[0]->renameTo(projection.name + ".proj", true); + auto builder = selected_parts[0]->data_part_storage->getBuilder(); + selected_parts[0]->renameTo(projection.name + ".proj", true, builder); selected_parts[0]->name = projection.name; selected_parts[0]->is_temp = false; + builder->commit(); ctx->new_data_part->addProjectionPart(name, std::move(selected_parts[0])); /// Task is finished @@ -944,6 +946,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, projection_block, projection, ctx->data_part_storage_builder, ctx->new_data_part.get(), ++block_num); + tmp_part.builder->commit(); tmp_part.finalize(); projection_parts[projection.name].emplace_back(std::move(tmp_part.part)); } @@ -966,6 +969,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, projection_block, projection, ctx->data_part_storage_builder, ctx->new_data_part.get(), ++block_num); + temp_part.builder->commit(); temp_part.finalize(); projection_parts[projection.name].emplace_back(std::move(temp_part.part)); } @@ -1552,5 +1556,9 @@ const MergeTreeData::HardlinkedFiles & MutateTask::getHardlinkedFiles() const return ctx->hardlinked_files; } +DataPartStorageBuilderPtr MutateTask::getBuilder() const +{ + return ctx->data_part_storage_builder; +} } diff --git a/src/Storages/MergeTree/MutateTask.h b/src/Storages/MergeTree/MutateTask.h index f4848dac3b3..1f2e8a6fd20 100644 --- a/src/Storages/MergeTree/MutateTask.h +++ b/src/Storages/MergeTree/MutateTask.h @@ -46,6 +46,8 @@ public: const MergeTreeData::HardlinkedFiles & getHardlinkedFiles() const; + DataPartStorageBuilderPtr getBuilder() const; + private: bool prepare(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 07e21def184..34d64b92d69 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -254,7 +254,7 @@ void ReplicatedMergeTreeSink::finishDelayedChunk(zkutil::ZooKeeperPtr & zookeepe try { - commitPart(zookeeper, part, partition.block_id); + commitPart(zookeeper, part, partition.block_id, partition.temp_part.builder); last_block_is_duplicate = last_block_is_duplicate || part->is_duplicate; @@ -289,7 +289,7 @@ void ReplicatedMergeTreeSink::writeExistingPart(MergeTreeData::MutableDataPartPt try { part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - commitPart(zookeeper, part, ""); + commitPart(zookeeper, part, "", part->data_part_storage->getBuilder()); PartLog::addNewPart(storage.getContext(), part, watch.elapsed()); } catch (...) @@ -301,7 +301,10 @@ void ReplicatedMergeTreeSink::writeExistingPart(MergeTreeData::MutableDataPartPt void ReplicatedMergeTreeSink::commitPart( - zkutil::ZooKeeperPtr & zookeeper, MergeTreeData::MutableDataPartPtr & part, const String & block_id) + zkutil::ZooKeeperPtr & zookeeper, + MergeTreeData::MutableDataPartPtr & part, + const String & block_id, + DataPartStorageBuilderPtr builder) { metadata_snapshot->check(part->getColumns()); assertSessionIsNotExpired(zookeeper); @@ -471,13 +474,15 @@ void ReplicatedMergeTreeSink::commitPart( /// Information about the part. storage.getCommitPartOps(ops, part, block_id_path); + /// It's important to create it outside of lock scope because + /// otherwise it can lock parts in destructor and deadlock is possible. MergeTreeData::Transaction transaction(storage, NO_TRANSACTION_RAW); /// If you can not add a part to ZK, we'll remove it back from the working set. bool renamed = false; try { auto lock = storage.lockParts(); - renamed = storage.renameTempPartAndAdd(part, transaction, lock); + renamed = storage.renameTempPartAndAdd(part, transaction, builder, lock); } catch (const Exception & e) { @@ -541,7 +546,8 @@ void ReplicatedMergeTreeSink::commitPart( transaction.rollbackPartsToTemporaryState(); part->is_temp = true; - part->renameTo(temporary_part_relative_path, false); + part->renameTo(temporary_part_relative_path, false, builder); + builder->commit(); /// If this part appeared on other replica than it's better to try to write it locally one more time. If it's our part /// than it will be ignored on the next itration. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 41953e034fa..f7504d2f784 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -69,7 +69,11 @@ private: void checkQuorumPrecondition(zkutil::ZooKeeperPtr & zookeeper); /// Rename temporary part and commit to ZooKeeper. - void commitPart(zkutil::ZooKeeperPtr & zookeeper, MergeTreeData::MutableDataPartPtr & part, const String & block_id); + void commitPart( + zkutil::ZooKeeperPtr & zookeeper, + MergeTreeData::MutableDataPartPtr & part, + const String & block_id, + DataPartStorageBuilderPtr part_builder); /// Wait for quorum to be satisfied on path (quorum_path) form part (part_name) /// Also checks that replica still alive. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 7dee7b8d0f8..ea90179caa3 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -353,4 +354,123 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl return diff; } +StorageInMemoryMetadata ReplicatedMergeTreeTableMetadata::Diff::getNewMetadata(const ColumnsDescription & new_columns, ContextPtr context, const StorageInMemoryMetadata & old_metadata) const +{ + StorageInMemoryMetadata new_metadata = old_metadata; + new_metadata.columns = new_columns; + + if (!empty()) + { + auto parse_key_expr = [] (const String & key_expr) + { + ParserNotEmptyExpressionList parser(false); + auto new_sorting_key_expr_list = parseQuery(parser, key_expr, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + + ASTPtr order_by_ast; + if (new_sorting_key_expr_list->children.size() == 1) + order_by_ast = new_sorting_key_expr_list->children[0]; + else + { + auto tuple = makeASTFunction("tuple"); + tuple->arguments->children = new_sorting_key_expr_list->children; + order_by_ast = tuple; + } + return order_by_ast; + }; + + if (sorting_key_changed) + { + auto order_by_ast = parse_key_expr(new_sorting_key); + + new_metadata.sorting_key.recalculateWithNewAST(order_by_ast, new_metadata.columns, context); + + if (new_metadata.primary_key.definition_ast == nullptr) + { + /// Primary and sorting key become independent after this ALTER so we have to + /// save the old ORDER BY expression as the new primary key. + auto old_sorting_key_ast = old_metadata.getSortingKey().definition_ast; + new_metadata.primary_key = KeyDescription::getKeyFromAST( + old_sorting_key_ast, new_metadata.columns, context); + } + } + + if (sampling_expression_changed) + { + if (!new_sampling_expression.empty()) + { + auto sample_by_ast = parse_key_expr(new_sampling_expression); + new_metadata.sampling_key.recalculateWithNewAST(sample_by_ast, new_metadata.columns, context); + } + else /// SAMPLE BY was removed + { + new_metadata.sampling_key = {}; + } + } + + if (skip_indices_changed) + new_metadata.secondary_indices = IndicesDescription::parse(new_skip_indices, new_columns, context); + + if (constraints_changed) + new_metadata.constraints = ConstraintsDescription::parse(new_constraints); + + if (projections_changed) + new_metadata.projections = ProjectionsDescription::parse(new_projections, new_columns, context); + + if (ttl_table_changed) + { + if (!new_ttl_table.empty()) + { + ParserTTLExpressionList parser; + auto ttl_for_table_ast = parseQuery(parser, new_ttl_table, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + new_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( + ttl_for_table_ast, new_metadata.columns, context, new_metadata.primary_key); + } + else /// TTL was removed + { + new_metadata.table_ttl = TTLTableDescription{}; + } + } + } + + /// Changes in columns may affect following metadata fields + new_metadata.column_ttls_by_name.clear(); + for (const auto & [name, ast] : new_metadata.columns.getColumnTTLs()) + { + auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, new_metadata.columns, context, new_metadata.primary_key); + new_metadata.column_ttls_by_name[name] = new_ttl_entry; + } + + if (new_metadata.partition_key.definition_ast != nullptr) + new_metadata.partition_key.recalculateWithNewColumns(new_metadata.columns, context); + + if (!sorting_key_changed) /// otherwise already updated + new_metadata.sorting_key.recalculateWithNewColumns(new_metadata.columns, context); + + /// Primary key is special, it exists even if not defined + if (new_metadata.primary_key.definition_ast != nullptr) + { + new_metadata.primary_key.recalculateWithNewColumns(new_metadata.columns, context); + } + else + { + new_metadata.primary_key = KeyDescription::getKeyFromAST(new_metadata.sorting_key.definition_ast, new_metadata.columns, context); + new_metadata.primary_key.definition_ast = nullptr; + } + + if (!sampling_expression_changed && new_metadata.sampling_key.definition_ast != nullptr) + new_metadata.sampling_key.recalculateWithNewColumns(new_metadata.columns, context); + + if (!skip_indices_changed) /// otherwise already updated + { + for (auto & index : new_metadata.secondary_indices) + index.recalculateWithNewColumns(new_metadata.columns, context); + } + + if (!ttl_table_changed && new_metadata.table_ttl.definition_ast != nullptr) + new_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( + new_metadata.table_ttl.definition_ast, new_metadata.columns, context, new_metadata.primary_key); + + return new_metadata; +} + } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 6d510d20304..eb2d087e988 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -66,6 +66,8 @@ struct ReplicatedMergeTreeTableMetadata return !sorting_key_changed && !sampling_expression_changed && !skip_indices_changed && !projections_changed && !ttl_table_changed && !constraints_changed; } + + StorageInMemoryMetadata getNewMetadata(const ColumnsDescription & new_columns, ContextPtr context, const StorageInMemoryMetadata & old_metadata) const; }; void checkEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const; diff --git a/src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp b/src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp new file mode 100644 index 00000000000..45d667047af --- /dev/null +++ b/src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +std::optional tryExtractZkPathFromCreateQuery(const IAST & create_query, const ContextPtr & global_context) +{ + const auto * create = create_query.as(); + if (!create || !create->storage || !create->storage->engine) + return {}; + + /// Check if the table engine is one of the ReplicatedMergeTree family. + const auto & ast_engine = *create->storage->engine; + if (!ast_engine.name.starts_with("Replicated") || !ast_engine.name.ends_with("MergeTree")) + return {}; + + /// Get the first argument. + const auto * ast_arguments = typeid_cast(ast_engine.arguments.get()); + if (!ast_arguments || ast_arguments->children.empty()) + return {}; + + auto * ast_zk_path = typeid_cast(ast_arguments->children[0].get()); + if (!ast_zk_path || (ast_zk_path->value.getType() != Field::Types::String)) + return {}; + + String zk_path = ast_zk_path->value.safeGet(); + + /// Expand macros. + Macros::MacroExpansionInfo info; + info.table_id.table_name = create->getTable(); + info.table_id.database_name = create->getDatabase(); + info.table_id.uuid = create->uuid; + auto database = DatabaseCatalog::instance().tryGetDatabase(info.table_id.database_name); + if (database && database->getEngineName() == "Replicated") + { + info.shard = getReplicatedDatabaseShardName(database); + info.replica = getReplicatedDatabaseReplicaName(database); + } + + try + { + zk_path = global_context->getMacros()->expand(zk_path, info); + } + catch (...) + { + return {}; /// Couldn't expand macros. + } + + return zk_path; +} + +} diff --git a/src/Storages/MergeTree/extractZkPathFromCreateQuery.h b/src/Storages/MergeTree/extractZkPathFromCreateQuery.h new file mode 100644 index 00000000000..e22f76d2cd5 --- /dev/null +++ b/src/Storages/MergeTree/extractZkPathFromCreateQuery.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +class IAST; +class Context; +using ContextPtr = std::shared_ptr; + +/// Extracts a zookeeper path from a specified CREATE TABLE query. Returns std::nullopt if fails. +/// The function takes the first argument of the ReplicatedMergeTree table engine and expands macros in it. +/// It works like a part of what the create() function in registerStorageMergeTree.cpp does but in a simpler manner. +std::optional tryExtractZkPathFromCreateQuery(const IAST & create_query, const ContextPtr & global_context); + +} diff --git a/src/Storages/NATS/Buffer_fwd.h b/src/Storages/NATS/Buffer_fwd.h new file mode 100644 index 00000000000..3eb52314a79 --- /dev/null +++ b/src/Storages/NATS/Buffer_fwd.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace DB +{ + +class ReadBufferFromNATSConsumer; +using ConsumerBufferPtr = std::shared_ptr; + +class WriteBufferToNATSProducer; +using ProducerBufferPtr = std::shared_ptr; + +} diff --git a/src/Storages/NATS/NATSConnection.cpp b/src/Storages/NATS/NATSConnection.cpp new file mode 100644 index 00000000000..359754bb144 --- /dev/null +++ b/src/Storages/NATS/NATSConnection.cpp @@ -0,0 +1,151 @@ +#include "NATSConnection.h" + +#include +#include + +#include + + +namespace DB +{ + +//static const auto CONNECT_SLEEP = 200; +static const auto RETRIES_MAX = 20; +static const auto CONNECTED_TO_BUFFER_SIZE = 256; + + +NATSConnectionManager::NATSConnectionManager(const NATSConfiguration & configuration_, Poco::Logger * log_) + : configuration(configuration_) + , log(log_) + , event_handler(loop.getLoop(), log) +{ +} + + +NATSConnectionManager::~NATSConnectionManager() +{ + if (has_connection) + natsConnection_Destroy(connection); +} + +String NATSConnectionManager::connectionInfoForLog() const +{ + if (!configuration.url.empty()) + { + return "url : " + configuration.url; + } + return "cluster: " + boost::algorithm::join(configuration.servers, ", "); +} + +bool NATSConnectionManager::isConnected() +{ + std::lock_guard lock(mutex); + return isConnectedImpl(); +} + +bool NATSConnectionManager::connect() +{ + std::lock_guard lock(mutex); + connectImpl(); + return isConnectedImpl(); +} + +bool NATSConnectionManager::reconnect() +{ + std::lock_guard lock(mutex); + if (isConnectedImpl()) + return true; + + disconnectImpl(); + + LOG_DEBUG(log, "Trying to restore connection to NATS {}", connectionInfoForLog()); + connectImpl(); + + return isConnectedImpl(); +} + +void NATSConnectionManager::disconnect() +{ + std::lock_guard lock(mutex); + disconnectImpl(); +} + +bool NATSConnectionManager::closed() +{ + std::lock_guard lock(mutex); + return natsConnection_IsClosed(connection); +} + +bool NATSConnectionManager::isConnectedImpl() const +{ + return connection && has_connection && !natsConnection_IsClosed(connection); +} + +void NATSConnectionManager::connectImpl() +{ + natsOptions * options = event_handler.getOptions(); + if (!configuration.username.empty() && !configuration.password.empty()) + natsOptions_SetUserInfo(options, configuration.username.c_str(), configuration.password.c_str()); + if (!configuration.token.empty()) + natsOptions_SetToken(options, configuration.token.c_str()); + + if (configuration.secure) + { + natsOptions_SetSecure(options, true); + natsOptions_SkipServerVerification(options, true); + } + if (!configuration.url.empty()) + { + natsOptions_SetURL(options, configuration.url.c_str()); + } + else + { + const char * servers[configuration.servers.size()]; + for (size_t i = 0; i < configuration.servers.size(); ++i) + { + servers[i] = configuration.servers[i].c_str(); + } + natsOptions_SetServers(options, servers, configuration.servers.size()); + } + natsOptions_SetMaxReconnect(options, configuration.max_reconnect); + natsOptions_SetReconnectWait(options, configuration.reconnect_wait); + natsOptions_SetDisconnectedCB(options, disconnectedCallback, log); + natsOptions_SetReconnectedCB(options, reconnectedCallback, log); + natsStatus status; + { + auto lock = event_handler.setThreadLocalLoop(); + status = natsConnection_Connect(&connection, options); + } + if (status == NATS_OK) + has_connection = true; + else + LOG_DEBUG(log, "New connection to {} failed. Nats status text: {}. Last error message: {}", + connectionInfoForLog(), natsStatus_GetText(status), nats_GetLastError(nullptr)); +} + +void NATSConnectionManager::disconnectImpl() +{ + if (!has_connection) + return; + + natsConnection_Close(connection); + + size_t cnt_retries = 0; + while (!natsConnection_IsClosed(connection) && cnt_retries++ != RETRIES_MAX) + event_handler.iterateLoop(); +} + +void NATSConnectionManager::reconnectedCallback(natsConnection * nc, void * log) +{ + char buffer[CONNECTED_TO_BUFFER_SIZE]; + buffer[0] = '\0'; + natsConnection_GetConnectedUrl(nc, buffer, sizeof(buffer)); + LOG_DEBUG(static_cast(log), "Got reconnected to NATS server: {}.", buffer); +} + +void NATSConnectionManager::disconnectedCallback(natsConnection *, void * log) +{ + LOG_DEBUG(static_cast(log), "Got disconnected from NATS server."); +} + +} diff --git a/src/Storages/NATS/NATSConnection.h b/src/Storages/NATS/NATSConnection.h new file mode 100644 index 00000000000..78a273164db --- /dev/null +++ b/src/Storages/NATS/NATSConnection.h @@ -0,0 +1,73 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +struct NATSConfiguration +{ + String url; + std::vector servers; + + String username; + String password; + String token; + + int max_reconnect; + int reconnect_wait; + + bool secure; +}; + +class NATSConnectionManager +{ +public: + NATSConnectionManager(const NATSConfiguration & configuration_, Poco::Logger * log_); + ~NATSConnectionManager(); + + bool isConnected(); + + bool connect(); + + bool reconnect(); + + void disconnect(); + + bool closed(); + + /// NATSHandler is thread safe. Any public methods can be called concurrently. + NATSHandler & getHandler() { return event_handler; } + natsConnection * getConnection() { return connection; } + + String connectionInfoForLog() const; + +private: + bool isConnectedImpl() const; + + void connectImpl(); + + void disconnectImpl(); + + static void disconnectedCallback(natsConnection * nc, void * log); + static void reconnectedCallback(natsConnection * nc, void * log); + + NATSConfiguration configuration; + Poco::Logger * log; + + UVLoop loop; + NATSHandler event_handler; + + + natsConnection * connection; + // true if at any point successfully connected to NATS + bool has_connection = false; + + std::mutex mutex; +}; + +using NATSConnectionManagerPtr = std::shared_ptr; + +} diff --git a/src/Storages/NATS/NATSHandler.cpp b/src/Storages/NATS/NATSHandler.cpp new file mode 100644 index 00000000000..b5812bc3349 --- /dev/null +++ b/src/Storages/NATS/NATSHandler.cpp @@ -0,0 +1,83 @@ +#include +#include +#include +#include + +namespace DB +{ + +/* The object of this class is shared between concurrent consumers (who share the same connection == share the same + * event loop and handler). + */ + +static const auto MAX_THREAD_WORK_DURATION_MS = 60000; + +NATSHandler::NATSHandler(uv_loop_t * loop_, Poco::Logger * log_) : + loop(loop_), + log(log_), + loop_running(false), + loop_state(Loop::STOP) +{ + natsLibuv_Init(); + natsLibuv_SetThreadLocalLoop(loop); + natsOptions_Create(&opts); + natsOptions_SetEventLoop(opts, static_cast(loop), + natsLibuv_Attach, + natsLibuv_Read, + natsLibuv_Write, + natsLibuv_Detach); + natsOptions_SetIOBufSize(opts, INT_MAX); + natsOptions_SetSendAsap(opts, true); +} + +void NATSHandler::startLoop() +{ + std::lock_guard lock(startup_mutex); + natsLibuv_SetThreadLocalLoop(loop); + + LOG_DEBUG(log, "Background loop started"); + loop_running.store(true); + auto start_time = std::chrono::steady_clock::now(); + auto end_time = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast(end_time - start_time); + + while (loop_state.load() == Loop::RUN && duration.count() < MAX_THREAD_WORK_DURATION_MS) + { + uv_run(loop, UV_RUN_NOWAIT); + end_time = std::chrono::steady_clock::now(); + duration = std::chrono::duration_cast(end_time - start_time); + } + + LOG_DEBUG(log, "Background loop ended"); + loop_running.store(false); +} + +void NATSHandler::iterateLoop() +{ + std::unique_lock lock(startup_mutex, std::defer_lock); + if (lock.try_lock()) + { + natsLibuv_SetThreadLocalLoop(loop); + uv_run(loop, UV_RUN_NOWAIT); + } +} + +LockPtr NATSHandler::setThreadLocalLoop() +{ + auto lock = std::make_unique>(startup_mutex); + natsLibuv_SetThreadLocalLoop(loop); + return lock; +} + +void NATSHandler::stopLoop() +{ + LOG_DEBUG(log, "Implicit loop stop."); + uv_stop(loop); +} + +NATSHandler::~NATSHandler() +{ + natsOptions_Destroy(opts); +} + +} diff --git a/src/Storages/NATS/NATSHandler.h b/src/Storages/NATS/NATSHandler.h new file mode 100644 index 00000000000..e3894c888a3 --- /dev/null +++ b/src/Storages/NATS/NATSHandler.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace Loop +{ + static const UInt8 RUN = 1; + static const UInt8 STOP = 2; +} + +using SubscriptionPtr = std::unique_ptr; +using LockPtr = std::unique_ptr>; + +class NATSHandler +{ +public: + NATSHandler(uv_loop_t * loop_, Poco::Logger * log_); + + ~NATSHandler(); + + /// Loop for background thread worker. + void startLoop(); + + /// Loop to wait for small tasks in a non-blocking mode. + /// Adds synchronization with main background loop. + void iterateLoop(); + + LockPtr setThreadLocalLoop(); + + void stopLoop(); + bool loopRunning() const { return loop_running.load(); } + + void updateLoopState(UInt8 state) { loop_state.store(state); } + UInt8 getLoopState() { return loop_state.load(); } + + natsOptions * getOptions() { return opts; } + +private: + uv_loop_t * loop; + natsOptions * opts = nullptr; + Poco::Logger * log; + + std::atomic loop_running; + std::atomic loop_state; + std::mutex startup_mutex; +}; + +} diff --git a/src/Storages/NATS/NATSSettings.cpp b/src/Storages/NATS/NATSSettings.cpp new file mode 100644 index 00000000000..ffdb79247d2 --- /dev/null +++ b/src/Storages/NATS/NATSSettings.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_TRAITS(NATSSettingsTraits, LIST_OF_NATS_SETTINGS) + +void NATSSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + e.addMessage("for storage " + storage_def.engine->name); + throw; + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } +} +} diff --git a/src/Storages/NATS/NATSSettings.h b/src/Storages/NATS/NATSSettings.h new file mode 100644 index 00000000000..6029aaea9f6 --- /dev/null +++ b/src/Storages/NATS/NATSSettings.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include + +namespace DB +{ +class ASTStorage; + +#define NATS_RELATED_SETTINGS(M) \ + M(String, nats_url, "", "A host-port to connect to NATS server.", 0) \ + M(String, nats_subjects, "", "List of subject for NATS table to subscribe/publish to.", 0) \ + M(String, nats_format, "", "The message format.", 0) \ + M(Char, nats_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ + M(String, nats_schema, "", "Schema identifier (used by schema-based formats) for NATS engine", 0) \ + M(UInt64, nats_num_consumers, 1, "The number of consumer channels per table.", 0) \ + M(String, nats_queue_group, "", "Name for queue group of NATS subscribers.", 0) \ + M(Bool, nats_secure, false, "Use SSL connection", 0) \ + M(UInt64, nats_max_reconnect, 5, "Maximum amount of reconnection attempts.", 0) \ + M(UInt64, nats_reconnect_wait, 2000, "Amount of time in milliseconds to sleep between each reconnect attempt.", 0) \ + M(String, nats_server_list, "", "Server list for connection", 0) \ + M(UInt64, nats_skip_broken_messages, 0, "Skip at least this number of broken messages from NATS per block", 0) \ + M(UInt64, nats_max_block_size, 0, "Number of row collected before flushing data from NATS.", 0) \ + M(Milliseconds, nats_flush_interval_ms, 0, "Timeout for flushing data from NATS.", 0) \ + M(String, nats_username, "", "NATS username", 0) \ + M(String, nats_password, "", "NATS password", 0) \ + M(String, nats_token, "", "NATS token", 0) + +#define LIST_OF_NATS_SETTINGS(M) \ + NATS_RELATED_SETTINGS(M) \ + FORMAT_FACTORY_SETTINGS(M) + +DECLARE_SETTINGS_TRAITS(NATSSettingsTraits, LIST_OF_NATS_SETTINGS) + +struct NATSSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); +}; +} diff --git a/src/Storages/NATS/NATSSink.cpp b/src/Storages/NATS/NATSSink.cpp new file mode 100644 index 00000000000..44cf51072e6 --- /dev/null +++ b/src/Storages/NATS/NATSSink.cpp @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +NATSSink::NATSSink( + StorageNATS & storage_, + const StorageMetadataPtr & metadata_snapshot_, + ContextPtr context_, + ProducerBufferPtr buffer_) + : SinkToStorage(metadata_snapshot_->getSampleBlockNonMaterialized()) + , storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , context(context_) + , buffer(buffer_) +{ +} + + +void NATSSink::onStart() +{ + buffer->activateWriting(); + + auto format_settings = getFormatSettings(context); + format_settings.protobuf.allow_multiple_rows_without_delimiter = true; + + format = FormatFactory::instance().getOutputFormat(storage.getFormatName(), *buffer, getHeader(), context, + [this](const Columns & /* columns */, size_t /* rows */) + { + buffer->countRow(); + }, + format_settings); +} + + +void NATSSink::consume(Chunk chunk) +{ + format->write(getHeader().cloneWithColumns(chunk.detachColumns())); +} + + +void NATSSink::onFinish() +{ + format->finalize(); + + if (buffer) + buffer->updateMaxWait(); +} + +} diff --git a/src/Storages/NATS/NATSSink.h b/src/Storages/NATS/NATSSink.h new file mode 100644 index 00000000000..d94575de0e7 --- /dev/null +++ b/src/Storages/NATS/NATSSink.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class IOutputFormat; +using IOutputFormatPtr = std::shared_ptr; + +class NATSSink : public SinkToStorage +{ +public: + explicit NATSSink(StorageNATS & storage_, const StorageMetadataPtr & metadata_snapshot_, ContextPtr context_, ProducerBufferPtr buffer_); + + void onStart() override; + void consume(Chunk chunk) override; + void onFinish() override; + + String getName() const override { return "NATSSink"; } + +private: + StorageNATS & storage; + StorageMetadataPtr metadata_snapshot; + ContextPtr context; + ProducerBufferPtr buffer; + IOutputFormatPtr format; +}; +} diff --git a/src/Storages/NATS/NATSSource.cpp b/src/Storages/NATS/NATSSource.cpp new file mode 100644 index 00000000000..f5e5e4f8b91 --- /dev/null +++ b/src/Storages/NATS/NATSSource.cpp @@ -0,0 +1,135 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + +static std::pair getHeaders(const StorageSnapshotPtr & storage_snapshot) +{ + auto non_virtual_header = storage_snapshot->metadata->getSampleBlockNonMaterialized(); + auto virtual_header = storage_snapshot->getSampleBlockForColumns({"_subject"}); + + return {non_virtual_header, virtual_header}; +} + +static Block getSampleBlock(const Block & non_virtual_header, const Block & virtual_header) +{ + auto header = non_virtual_header; + for (const auto & column : virtual_header) + header.insert(column); + + return header; +} + +NATSSource::NATSSource( + StorageNATS & storage_, + const StorageSnapshotPtr & storage_snapshot_, + ContextPtr context_, + const Names & columns, + size_t max_block_size_) + : NATSSource(storage_, storage_snapshot_, getHeaders(storage_snapshot_), context_, columns, max_block_size_) +{ +} + +NATSSource::NATSSource( + StorageNATS & storage_, + const StorageSnapshotPtr & storage_snapshot_, + std::pair headers, + ContextPtr context_, + const Names & columns, + size_t max_block_size_) + : ISource(getSampleBlock(headers.first, headers.second)) + , storage(storage_) + , storage_snapshot(storage_snapshot_) + , context(context_) + , column_names(columns) + , max_block_size(max_block_size_) + , non_virtual_header(std::move(headers.first)) + , virtual_header(std::move(headers.second)) +{ + storage.incrementReader(); +} + + +NATSSource::~NATSSource() +{ + storage.decrementReader(); + + if (!buffer) + return; + + buffer->allowNext(); + storage.pushReadBuffer(buffer); +} + +bool NATSSource::checkTimeLimit() const +{ + if (max_execution_time != 0) + { + auto elapsed_ns = total_stopwatch.elapsed(); + + if (elapsed_ns > static_cast(max_execution_time.totalMicroseconds()) * 1000) + return false; + } + + return true; +} + +Chunk NATSSource::generate() +{ + if (!buffer) + { + auto timeout = std::chrono::milliseconds(context->getSettingsRef().rabbitmq_max_wait_ms.totalMilliseconds()); + buffer = storage.popReadBuffer(timeout); + buffer->subscribe(); + } + + if (!buffer || is_finished) + return {}; + + is_finished = true; + + MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); + auto input_format + = FormatFactory::instance().getInputFormat(storage.getFormatName(), *buffer, non_virtual_header, context, max_block_size); + + StreamingFormatExecutor executor(non_virtual_header, input_format); + + size_t total_rows = 0; + + while (true) + { + if (buffer->eof()) + break; + + auto new_rows = executor.execute(); + + if (new_rows) + { + auto subject = buffer->getSubject(); + virtual_columns[0]->insertMany(subject, new_rows); + + total_rows = total_rows + new_rows; + } + + buffer->allowNext(); + + if (total_rows >= max_block_size || buffer->queueEmpty() || buffer->isConsumerStopped() || !checkTimeLimit()) + break; + } + + if (total_rows == 0) + return {}; + + auto result_columns = executor.getResultColumns(); + for (auto & column : virtual_columns) + result_columns.push_back(std::move(column)); + + return Chunk(std::move(result_columns), total_rows); +} + +} diff --git a/src/Storages/NATS/NATSSource.h b/src/Storages/NATS/NATSSource.h new file mode 100644 index 00000000000..e4e94d2347a --- /dev/null +++ b/src/Storages/NATS/NATSSource.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class NATSSource : public ISource +{ +public: + NATSSource( + StorageNATS & storage_, + const StorageSnapshotPtr & storage_snapshot_, + ContextPtr context_, + const Names & columns, + size_t max_block_size_); + + ~NATSSource() override; + + String getName() const override { return storage.getName(); } + ConsumerBufferPtr getBuffer() { return buffer; } + + Chunk generate() override; + + bool queueEmpty() const { return !buffer || buffer->queueEmpty(); } + + void setTimeLimit(Poco::Timespan max_execution_time_) { max_execution_time = max_execution_time_; } + +private: + bool checkTimeLimit() const; + + StorageNATS & storage; + StorageSnapshotPtr storage_snapshot; + ContextPtr context; + Names column_names; + const size_t max_block_size; + + bool is_finished = false; + const Block non_virtual_header; + const Block virtual_header; + + ConsumerBufferPtr buffer; + + Poco::Timespan max_execution_time = 0; + Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; + + NATSSource( + StorageNATS & storage_, + const StorageSnapshotPtr & storage_snapshot_, + std::pair headers, + ContextPtr context_, + const Names & columns, + size_t max_block_size_); +}; + +} diff --git a/src/Storages/NATS/ReadBufferFromNATSConsumer.cpp b/src/Storages/NATS/ReadBufferFromNATSConsumer.cpp new file mode 100644 index 00000000000..fa6e60ac213 --- /dev/null +++ b/src/Storages/NATS/ReadBufferFromNATSConsumer.cpp @@ -0,0 +1,113 @@ +#include +#include +#include +#include +#include +#include +#include +#include "Poco/Timer.h" +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int CANNOT_CONNECT_NATS; +} + +ReadBufferFromNATSConsumer::ReadBufferFromNATSConsumer( + std::shared_ptr connection_, + StorageNATS & storage_, + std::vector & subjects_, + const String & subscribe_queue_name, + Poco::Logger * log_, + char row_delimiter_, + uint32_t queue_size_, + const std::atomic & stopped_) + : ReadBuffer(nullptr, 0) + , connection(connection_) + , storage(storage_) + , subjects(subjects_) + , log(log_) + , row_delimiter(row_delimiter_) + , stopped(stopped_) + , queue_name(subscribe_queue_name) + , received(queue_size_) +{ +} + +void ReadBufferFromNATSConsumer::subscribe() +{ + if (subscribed) + return; + + for (const auto & subject : subjects) + { + natsSubscription * ns; + auto status = natsConnection_QueueSubscribe( + &ns, connection->getConnection(), subject.c_str(), queue_name.c_str(), onMsg, static_cast(this)); + if (status == NATS_OK) + { + LOG_DEBUG(log, "Subscribed to subject {}", subject); + natsSubscription_SetPendingLimits(ns, -1, -1); + subscriptions.emplace_back(ns, &natsSubscription_Destroy); + } + else + { + throw Exception(ErrorCodes::CANNOT_CONNECT_NATS, "Failed to subscribe to subject {}", subject); + } + } + subscribed = true; +} + +void ReadBufferFromNATSConsumer::unsubscribe() +{ + for (const auto & subscription : subscriptions) + natsSubscription_Unsubscribe(subscription.get()); +} + +bool ReadBufferFromNATSConsumer::nextImpl() +{ + if (stopped || !allowed) + return false; + + if (received.tryPop(current)) + { + auto * new_position = const_cast(current.message.data()); + BufferBase::set(new_position, current.message.size(), 0); + allowed = false; + + return true; + } + + return false; +} + +void ReadBufferFromNATSConsumer::onMsg(natsConnection *, natsSubscription *, natsMsg * msg, void * consumer) +{ + auto * buffer = static_cast(consumer); + const int msg_length = natsMsg_GetDataLength(msg); + + if (msg_length) + { + String message_received = std::string(natsMsg_GetData(msg), msg_length); + String subject = natsMsg_GetSubject(msg); + if (buffer->row_delimiter != '\0') + message_received += buffer->row_delimiter; + + MessageData data = { + .message = message_received, + .subject = subject, + }; + if (!buffer->received.push(std::move(data))) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to received queue"); + + buffer->storage.startStreaming(); + } + + natsMsg_Destroy(msg); +} + +} diff --git a/src/Storages/NATS/ReadBufferFromNATSConsumer.h b/src/Storages/NATS/ReadBufferFromNATSConsumer.h new file mode 100644 index 00000000000..306c0aff3bf --- /dev/null +++ b/src/Storages/NATS/ReadBufferFromNATSConsumer.h @@ -0,0 +1,73 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace Poco +{ +class Logger; +} + +namespace DB +{ + +class ReadBufferFromNATSConsumer : public ReadBuffer +{ +public: + ReadBufferFromNATSConsumer( + std::shared_ptr connection_, + StorageNATS & storage_, + std::vector & subjects_, + const String & subscribe_queue_name, + Poco::Logger * log_, + char row_delimiter_, + uint32_t queue_size_, + const std::atomic & stopped_); + + struct MessageData + { + String message; + String subject; + }; + + void subscribe(); + void unsubscribe(); + + size_t subjectsCount() { return subjects.size(); } + + bool isConsumerStopped() { return stopped; } + + bool queueEmpty() { return received.empty(); } + size_t queueSize() { return received.size(); } + void allowNext() { allowed = true; } // Allow to read next message. + + auto getSubject() const { return current.subject; } + +private: + bool nextImpl() override; + + static void onMsg(natsConnection * nc, natsSubscription * sub, natsMsg * msg, void * consumer); + + std::shared_ptr connection; + StorageNATS & storage; + std::vector subscriptions; + std::vector subjects; + Poco::Logger * log; + char row_delimiter; + bool allowed = true; + const std::atomic & stopped; + + bool subscribed = false; + String queue_name; + + String channel_id; + ConcurrentBoundedQueue received; + MessageData current; +}; + +} diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp new file mode 100644 index 00000000000..3c1a04c7824 --- /dev/null +++ b/src/Storages/NATS/StorageNATS.cpp @@ -0,0 +1,730 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +static const uint32_t QUEUE_SIZE = 100000; +static const auto RESCHEDULE_MS = 500; +static const auto MAX_THREAD_WORK_DURATION_MS = 60000; + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int CANNOT_CONNECT_NATS; + extern const int QUERY_NOT_ALLOWED; +} + + +StorageNATS::StorageNATS( + const StorageID & table_id_, + ContextPtr context_, + const ColumnsDescription & columns_, + std::unique_ptr nats_settings_, + bool is_attach_) + : IStorage(table_id_) + , WithContext(context_->getGlobalContext()) + , nats_settings(std::move(nats_settings_)) + , subjects(parseList(getContext()->getMacros()->expand(nats_settings->nats_subjects), ',')) + , format_name(getContext()->getMacros()->expand(nats_settings->nats_format)) + , row_delimiter(nats_settings->nats_row_delimiter.value) + , schema_name(getContext()->getMacros()->expand(nats_settings->nats_schema)) + , num_consumers(nats_settings->nats_num_consumers.value) + , log(&Poco::Logger::get("StorageNATS (" + table_id_.table_name + ")")) + , semaphore(0, num_consumers) + , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) + , is_attach(is_attach_) +{ + auto nats_username = getContext()->getMacros()->expand(nats_settings->nats_username); + auto nats_password = getContext()->getMacros()->expand(nats_settings->nats_password); + auto nats_token = getContext()->getMacros()->expand(nats_settings->nats_token); + + configuration = + { + .url = getContext()->getMacros()->expand(nats_settings->nats_url), + .servers = parseList(getContext()->getMacros()->expand(nats_settings->nats_server_list), ','), + .username = nats_username.empty() ? getContext()->getConfigRef().getString("nats.user", "") : nats_username, + .password = nats_password.empty() ? getContext()->getConfigRef().getString("nats.password", "") : nats_password, + .token = nats_token.empty() ? getContext()->getConfigRef().getString("nats.token", "") : nats_token, + .max_reconnect = static_cast(nats_settings->nats_max_reconnect.value), + .reconnect_wait = static_cast(nats_settings->nats_reconnect_wait.value), + .secure = nats_settings->nats_secure.value + }; + + if (configuration.secure) + SSL_library_init(); + + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + setInMemoryMetadata(storage_metadata); + + nats_context = addSettings(getContext()); + nats_context->makeQueryContext(); + + try + { + connection = std::make_shared(configuration, log); + if (!connection->connect()) + throw Exception(ErrorCodes::CANNOT_CONNECT_NATS, "Cannot connect to {}. Nats last error: {}", + connection->connectionInfoForLog(), nats_GetLastError(nullptr)); + } + catch (...) + { + tryLogCurrentException(log); + if (!is_attach) + throw; + } + + /// One looping task for all consumers as they share the same connection == the same handler == the same event loop + looping_task = getContext()->getMessageBrokerSchedulePool().createTask("NATSLoopingTask", [this] { loopingFunc(); }); + looping_task->deactivate(); + + streaming_task = getContext()->getMessageBrokerSchedulePool().createTask("NATSStreamingTask", [this] { streamingToViewsFunc(); }); + streaming_task->deactivate(); + + connection_task = getContext()->getMessageBrokerSchedulePool().createTask("NATSConnectionManagerTask", [this] { connectionFunc(); }); + connection_task->deactivate(); +} + + +Names StorageNATS::parseList(const String & list, char delim) +{ + Names result; + if (list.empty()) + return result; + boost::split(result, list, [delim](char c) { return c == delim; }); + for (String & key : result) + boost::trim(key); + + return result; +} + + +String StorageNATS::getTableBasedName(String name, const StorageID & table_id) +{ + if (name.empty()) + return fmt::format("{}_{}", table_id.database_name, table_id.table_name); + else + return fmt::format("{}_{}_{}", name, table_id.database_name, table_id.table_name); +} + + +ContextMutablePtr StorageNATS::addSettings(ContextPtr local_context) const +{ + auto modified_context = Context::createCopy(local_context); + modified_context->setSetting("input_format_skip_unknown_fields", true); + modified_context->setSetting("input_format_allow_errors_ratio", 0.); + modified_context->setSetting("input_format_allow_errors_num", nats_settings->nats_skip_broken_messages.value); + + if (!schema_name.empty()) + modified_context->setSetting("format_schema", schema_name); + + for (const auto & setting : *nats_settings) + { + const auto & setting_name = setting.getName(); + + /// check for non-nats-related settings + if (!setting_name.starts_with("nats_")) + modified_context->setSetting(setting_name, setting.getValue()); + } + + return modified_context; +} + + +void StorageNATS::loopingFunc() +{ + connection->getHandler().startLoop(); + looping_task->activateAndSchedule(); +} + + +void StorageNATS::stopLoop() +{ + connection->getHandler().updateLoopState(Loop::STOP); +} + +void StorageNATS::stopLoopIfNoReaders() +{ + /// Stop the loop if no select was started. + /// There can be a case that selects are finished + /// but not all sources decremented the counter, then + /// it is ok that the loop is not stopped, because + /// there is a background task (streaming_task), which + /// also checks whether there is an idle loop. + std::lock_guard lock(loop_mutex); + if (readers_count) + return; + connection->getHandler().updateLoopState(Loop::STOP); +} + +void StorageNATS::startLoop() +{ + connection->getHandler().updateLoopState(Loop::RUN); + looping_task->activateAndSchedule(); +} + + +void StorageNATS::incrementReader() +{ + ++readers_count; +} + + +void StorageNATS::decrementReader() +{ + --readers_count; +} + + +void StorageNATS::connectionFunc() +{ + if (consumers_ready) + return; + + bool needs_rescheduling = true; + if (connection->reconnect()) + needs_rescheduling &= !initBuffers(); + + if (needs_rescheduling) + connection_task->scheduleAfter(RESCHEDULE_MS); +} + +bool StorageNATS::initBuffers() +{ + size_t num_initialized = 0; + for (auto & buffer : buffers) + { + try + { + buffer->subscribe(); + ++num_initialized; + } + catch (...) + { + tryLogCurrentException(log); + break; + } + } + + startLoop(); + const bool are_buffers_initialized = num_initialized == num_created_consumers; + if (are_buffers_initialized) + consumers_ready.store(true); + return are_buffers_initialized; +} + + +/* Need to deactivate this way because otherwise might get a deadlock when first deactivate streaming task in shutdown and then + * inside streaming task try to deactivate any other task + */ +void StorageNATS::deactivateTask(BackgroundSchedulePool::TaskHolder & task, bool stop_loop) +{ + if (stop_loop) + stopLoop(); + + std::unique_lock lock(task_mutex, std::defer_lock); + lock.lock(); + task->deactivate(); +} + + +size_t StorageNATS::getMaxBlockSize() const +{ + return nats_settings->nats_max_block_size.changed ? nats_settings->nats_max_block_size.value + : (getContext()->getSettingsRef().max_insert_block_size.value / num_consumers); +} + + +void StorageNATS::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum /* processed_stage */, + size_t /* max_block_size */, + unsigned /* num_streams */) +{ + if (!consumers_ready) + throw Exception("NATS consumers setup not finished. Connection might be lost", ErrorCodes::CANNOT_CONNECT_NATS); + + if (num_created_consumers == 0) + return; + + if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) + throw Exception( + ErrorCodes::QUERY_NOT_ALLOWED, "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); + + if (mv_attached) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageNATS with attached materialized views"); + + std::lock_guard lock(loop_mutex); + + auto sample_block = storage_snapshot->getSampleBlockForColumns(column_names); + auto modified_context = addSettings(local_context); + + if (!connection->isConnected()) + { + if (!connection->reconnect()) + throw Exception(ErrorCodes::CANNOT_CONNECT_NATS, "No connection to {}", connection->connectionInfoForLog()); + } + + Pipes pipes; + pipes.reserve(num_created_consumers); + + for (size_t i = 0; i < num_created_consumers; ++i) + { + auto nats_source = std::make_shared(*this, storage_snapshot, modified_context, column_names, 1); + + auto converting_dag = ActionsDAG::makeConvertingActions( + nats_source->getPort().getHeader().getColumnsWithTypeAndName(), + sample_block.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name); + + auto converting = std::make_shared(std::move(converting_dag)); + auto converting_transform = std::make_shared(nats_source->getPort().getHeader(), std::move(converting)); + + pipes.emplace_back(std::move(nats_source)); + pipes.back().addTransform(std::move(converting_transform)); + } + + if (!connection->getHandler().loopRunning() && connection->isConnected()) + startLoop(); + + LOG_DEBUG(log, "Starting reading {} streams", pipes.size()); + auto pipe = Pipe::unitePipes(std::move(pipes)); + + if (pipe.empty()) + { + auto header = storage_snapshot->getSampleBlockForColumns(column_names); + InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info, local_context); + } + else + { + auto read_step = std::make_unique(std::move(pipe), getName(), query_info.storage_limits); + query_plan.addStep(std::move(read_step)); + query_plan.addInterpreterContext(modified_context); + } +} + + +SinkToStoragePtr StorageNATS::write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context) +{ + auto modified_context = addSettings(local_context); + std::string subject = modified_context->getSettingsRef().stream_like_engine_insert_queue.changed + ? modified_context->getSettingsRef().stream_like_engine_insert_queue.value + : ""; + if (subject.empty()) + { + if (subjects.size() > 1) + { + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "This NATS engine reads from multiple subjects. You must specify `stream_like_engine_insert_queue` to choose the subject to write to"); + } + else + { + subject = subjects[0]; + } + } + + auto pos = subject.find('*'); + if (pos != std::string::npos || subject.back() == '>') + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Can not publish to wildcard subject"); + + if (!isSubjectInSubscriptions(subject)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Selected subject is not among engine subjects"); + + return std::make_shared(*this, metadata_snapshot, local_context, createWriteBuffer(subject)); +} + + +void StorageNATS::startup() +{ + for (size_t i = 0; i < num_consumers; ++i) + { + try + { + auto buffer = createReadBuffer(); + pushReadBuffer(std::move(buffer)); + ++num_created_consumers; + } + catch (...) + { + if (!is_attach) + throw; + tryLogCurrentException(log); + } + } + + if (!connection->isConnected() || !initBuffers()) + connection_task->activateAndSchedule(); +} + + +void StorageNATS::shutdown() +{ + shutdown_called = true; + + /// In case it has not yet been able to setup connection; + deactivateTask(connection_task, false); + + /// The order of deactivating tasks is important: wait for streamingToViews() func to finish and + /// then wait for background event loop to finish. + deactivateTask(streaming_task, false); + deactivateTask(looping_task, true); + + /// Just a paranoid try catch, it is not actually needed. + try + { + if (drop_table) + { + for (auto & buffer : buffers) + buffer->unsubscribe(); + } + + connection->disconnect(); + + for (size_t i = 0; i < num_created_consumers; ++i) + popReadBuffer(); + } + catch (...) + { + tryLogCurrentException(log); + } +} + +void StorageNATS::pushReadBuffer(ConsumerBufferPtr buffer) +{ + std::lock_guard lock(buffers_mutex); + buffers.push_back(buffer); + semaphore.set(); +} + + +ConsumerBufferPtr StorageNATS::popReadBuffer() +{ + return popReadBuffer(std::chrono::milliseconds::zero()); +} + + +ConsumerBufferPtr StorageNATS::popReadBuffer(std::chrono::milliseconds timeout) +{ + // Wait for the first free buffer + if (timeout == std::chrono::milliseconds::zero()) + semaphore.wait(); + else + { + if (!semaphore.tryWait(timeout.count())) + return nullptr; + } + + // Take the first available buffer from the list + std::lock_guard lock(buffers_mutex); + auto buffer = buffers.back(); + buffers.pop_back(); + + return buffer; +} + + +ConsumerBufferPtr StorageNATS::createReadBuffer() +{ + return std::make_shared( + connection, *this, subjects, + nats_settings->nats_queue_group.changed ? nats_settings->nats_queue_group.value : getStorageID().getFullTableName(), + log, row_delimiter, queue_size, shutdown_called); +} + + +ProducerBufferPtr StorageNATS::createWriteBuffer(const std::string & subject) +{ + return std::make_shared( + configuration, getContext(), subject, shutdown_called, log, + row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); +} + +bool StorageNATS::isSubjectInSubscriptions(const std::string & subject) +{ + auto subject_levels = parseList(subject, '.'); + + for (const auto & nats_subject : subjects) + { + auto nats_subject_levels = parseList(nats_subject, '.'); + size_t levels_to_check = 0; + if (!nats_subject_levels.empty() && nats_subject_levels.back() == ">") + levels_to_check = nats_subject_levels.size() - 1; + if (levels_to_check) + { + if (subject_levels.size() < levels_to_check) + continue; + } + else + { + if (subject_levels.size() != nats_subject_levels.size()) + continue; + levels_to_check = nats_subject_levels.size(); + } + + bool is_same = true; + for (size_t i = 0; i < levels_to_check; ++i) + { + if (nats_subject_levels[i] == "*") + continue; + + if (subject_levels[i] != nats_subject_levels[i]) + { + is_same = false; + break; + } + } + if (is_same) + return true; + } + + return false; +} + + +bool StorageNATS::checkDependencies(const StorageID & table_id) +{ + // Check if all dependencies are attached + auto dependencies = DatabaseCatalog::instance().getDependencies(table_id); + if (dependencies.empty()) + return true; + + // Check the dependencies are ready? + for (const auto & db_tab : dependencies) + { + auto table = DatabaseCatalog::instance().tryGetTable(db_tab, getContext()); + if (!table) + return false; + + // If it materialized view, check it's target table + auto * materialized_view = dynamic_cast(table.get()); + if (materialized_view && !materialized_view->tryGetTargetTable()) + return false; + + // Check all its dependencies + if (!checkDependencies(db_tab)) + return false; + } + + return true; +} + + +void StorageNATS::streamingToViewsFunc() +{ + bool do_reschedule = true; + try + { + auto table_id = getStorageID(); + + // Check if at least one direct dependency is attached + size_t dependencies_count = DatabaseCatalog::instance().getDependencies(table_id).size(); + bool nats_connected = connection->isConnected() || connection->reconnect(); + + if (dependencies_count && nats_connected) + { + auto start_time = std::chrono::steady_clock::now(); + + mv_attached.store(true); + + // Keep streaming as long as there are attached views and streaming is not cancelled + while (!shutdown_called && num_created_consumers > 0) + { + if (!checkDependencies(table_id)) + break; + + LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count); + + if (streamToViews()) + { + /// Reschedule with backoff. + do_reschedule = false; + break; + } + + auto end_time = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast(end_time - start_time); + if (duration.count() > MAX_THREAD_WORK_DURATION_MS) + { + LOG_TRACE(log, "Reschedule streaming. Thread work duration limit exceeded."); + break; + } + } + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + mv_attached.store(false); + + if (!shutdown_called && do_reschedule) + streaming_task->scheduleAfter(RESCHEDULE_MS); +} + + +bool StorageNATS::streamToViews() +{ + auto table_id = getStorageID(); + auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); + if (!table) + throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::LOGICAL_ERROR); + + // Create an INSERT query for streaming data + auto insert = std::make_shared(); + insert->table_id = table_id; + + // Only insert into dependent views and expect that input blocks contain virtual columns + InterpreterInsertQuery interpreter(insert, nats_context, false, true, true); + auto block_io = interpreter.execute(); + + auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); + auto column_names = block_io.pipeline.getHeader().getNames(); + auto sample_block = storage_snapshot->getSampleBlockForColumns(column_names); + + auto block_size = getMaxBlockSize(); + + // Create a stream for each consumer and join them in a union stream + std::vector> sources; + Pipes pipes; + sources.reserve(num_created_consumers); + pipes.reserve(num_created_consumers); + + for (size_t i = 0; i < num_created_consumers; ++i) + { + LOG_DEBUG(log, "Current queue size: {}", buffers[0]->queueSize()); + auto source = std::make_shared(*this, storage_snapshot, nats_context, column_names, block_size); + sources.emplace_back(source); + pipes.emplace_back(source); + + Poco::Timespan max_execution_time = nats_settings->nats_flush_interval_ms.changed + ? nats_settings->nats_flush_interval_ms + : getContext()->getSettingsRef().stream_flush_interval_ms; + + source->setTimeLimit(max_execution_time); + } + + block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); + + if (!connection->getHandler().loopRunning()) + startLoop(); + + { + CompletedPipelineExecutor executor(block_io.pipeline); + executor.execute(); + } + + size_t queue_empty = 0; + + if (!connection->isConnected()) + { + if (shutdown_called) + return true; + + if (connection->reconnect()) + { + LOG_DEBUG(log, "Connection restored"); + } + else + { + LOG_TRACE(log, "Reschedule streaming. Unable to restore connection."); + return true; + } + } + else + { + for (auto & source : sources) + { + if (source->queueEmpty()) + ++queue_empty; + + connection->getHandler().iterateLoop(); + } + } + + if (queue_empty == num_created_consumers) + { + LOG_TRACE(log, "Reschedule streaming. Queues are empty."); + return true; + } + else + { + startLoop(); + } + + /// Do not reschedule, do not stop event loop. + return false; +} + + +void registerStorageNATS(StorageFactory & factory) +{ + auto creator_fn = [](const StorageFactory::Arguments & args) + { + auto nats_settings = std::make_unique(); + bool with_named_collection = getExternalDataSourceConfiguration(args.engine_args, *nats_settings, args.getLocalContext()); + if (!with_named_collection && !args.storage_def->settings) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "NATS engine must have settings"); + + nats_settings->loadFromQuery(*args.storage_def); + + if (!nats_settings->nats_url.changed && !nats_settings->nats_server_list.changed) + throw Exception( + "You must specify either `nats_url` or `nats_server_list` settings", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (!nats_settings->nats_format.changed) + throw Exception("You must specify `nats_format` setting", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (!nats_settings->nats_subjects.changed) + throw Exception("You must specify `nats_subjects` setting", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(nats_settings), args.attach); + }; + + factory.registerStorage("NATS", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); +} + + +NamesAndTypesList StorageNATS::getVirtuals() const +{ + return NamesAndTypesList{ + {"_subject", std::make_shared()} + }; +} + +} diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h new file mode 100644 index 00000000000..185b39250c8 --- /dev/null +++ b/src/Storages/NATS/StorageNATS.h @@ -0,0 +1,145 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class StorageNATS final : public IStorage, WithContext +{ +public: + StorageNATS( + const StorageID & table_id_, + ContextPtr context_, + const ColumnsDescription & columns_, + std::unique_ptr nats_settings_, + bool is_attach_); + + std::string getName() const override { return "NATS"; } + + bool noPushingToViews() const override { return true; } + + void startup() override; + void shutdown() override; + + /// This is a bad way to let storage know in shutdown() that table is going to be dropped. There are some actions which need + /// to be done only when table is dropped (not when detached). Also connection must be closed only in shutdown, but those + /// actions require an open connection. Therefore there needs to be a way inside shutdown() method to know whether it is called + /// because of drop query. And drop() method is not suitable at all, because it will not only require to reopen connection, but also + /// it can be called considerable time after table is dropped (for example, in case of Atomic database), which is not appropriate for the case. + void checkTableCanBeDropped() const override { drop_table = true; } + + /// Always return virtual columns in addition to required columns + void read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum /* processed_stage */, + size_t /* max_block_size */, + unsigned /* num_streams */) override; + + SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; + + void pushReadBuffer(ConsumerBufferPtr buf); + ConsumerBufferPtr popReadBuffer(); + ConsumerBufferPtr popReadBuffer(std::chrono::milliseconds timeout); + + const String & getFormatName() const { return format_name; } + NamesAndTypesList getVirtuals() const override; + + void incrementReader(); + void decrementReader(); + + void startStreaming() { if (!mv_attached) { streaming_task->activateAndSchedule(); } } + +private: + ContextMutablePtr nats_context; + std::unique_ptr nats_settings; + std::vector subjects; + + const String format_name; + char row_delimiter; + const String schema_name; + size_t num_consumers; + + Poco::Logger * log; + + NATSConnectionManagerPtr connection; /// Connection for all consumers + NATSConfiguration configuration; + + size_t num_created_consumers = 0; + Poco::Semaphore semaphore; + std::mutex buffers_mutex; + std::vector buffers; /// available buffers for NATS consumers + + /// maximum number of messages in NATS queue (x-max-length). Also used + /// to setup size of inner buffer for received messages + uint32_t queue_size; + + std::once_flag flag; /// remove exchange only once + std::mutex task_mutex; + BackgroundSchedulePool::TaskHolder streaming_task; + BackgroundSchedulePool::TaskHolder looping_task; + BackgroundSchedulePool::TaskHolder connection_task; + + /// True if consumers have subscribed to all subjects + std::atomic consumers_ready{false}; + /// Needed for tell MV or producer background tasks + /// that they must finish as soon as possible. + std::atomic shutdown_called{false}; + /// For select query we must be aware of the end of streaming + /// to be able to turn off the loop. + std::atomic readers_count = 0; + std::atomic mv_attached = false; + + /// In select query we start event loop, but do not stop it + /// after that select is finished. Then in a thread, which + /// checks for MV we also check if we have select readers. + /// If not - we turn off the loop. The checks are done under + /// mutex to avoid having a turned off loop when select was + /// started. + std::mutex loop_mutex; + + mutable bool drop_table = false; + bool is_attach; + + ConsumerBufferPtr createReadBuffer(); + ProducerBufferPtr createWriteBuffer(const std::string & subject); + + bool isSubjectInSubscriptions(const std::string & subject); + + + /// Functions working in the background + void streamingToViewsFunc(); + void loopingFunc(); + void connectionFunc(); + + bool initBuffers(); + + void startLoop(); + void stopLoop(); + void stopLoopIfNoReaders(); + + static Names parseList(const String & list, char delim); + static String getTableBasedName(String name, const StorageID & table_id); + + ContextMutablePtr addSettings(ContextPtr context) const; + size_t getMaxBlockSize() const; + void deactivateTask(BackgroundSchedulePool::TaskHolder & task, bool stop_loop); + + bool streamToViews(); + bool checkDependencies(const StorageID & table_id); +}; + +} diff --git a/src/Storages/NATS/WriteBufferToNATSProducer.cpp b/src/Storages/NATS/WriteBufferToNATSProducer.cpp new file mode 100644 index 00000000000..af76247d903 --- /dev/null +++ b/src/Storages/NATS/WriteBufferToNATSProducer.cpp @@ -0,0 +1,183 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +static const auto BATCH = 1000; +static const auto MAX_BUFFERED = 131072; + +namespace ErrorCodes +{ + extern const int CANNOT_CONNECT_NATS; + extern const int LOGICAL_ERROR; +} + +WriteBufferToNATSProducer::WriteBufferToNATSProducer( + const NATSConfiguration & configuration_, + ContextPtr global_context, + const String & subject_, + std::atomic & shutdown_called_, + Poco::Logger * log_, + std::optional delimiter, + size_t rows_per_message, + size_t chunk_size_) + : WriteBuffer(nullptr, 0) + , connection(configuration_, log_) + , subject(subject_) + , shutdown_called(shutdown_called_) + , payloads(BATCH) + , log(log_) + , delim(delimiter) + , max_rows(rows_per_message) + , chunk_size(chunk_size_) +{ + if (!connection.connect()) + throw Exception(ErrorCodes::CANNOT_CONNECT_NATS, "Cannot connect to NATS {}", connection.connectionInfoForLog()); + + writing_task = global_context->getSchedulePool().createTask("NATSWritingTask", [this] { writingFunc(); }); + writing_task->deactivate(); + + reinitializeChunks(); +} + + +WriteBufferToNATSProducer::~WriteBufferToNATSProducer() +{ + writing_task->deactivate(); + assert(rows == 0); +} + + +void WriteBufferToNATSProducer::countRow() +{ + if (++rows % max_rows == 0) + { + const std::string & last_chunk = chunks.back(); + size_t last_chunk_size = offset(); + + if (last_chunk_size && delim && last_chunk[last_chunk_size - 1] == delim) + --last_chunk_size; + + std::string payload; + payload.reserve((chunks.size() - 1) * chunk_size + last_chunk_size); + + for (auto i = chunks.begin(), end = --chunks.end(); i != end; ++i) + payload.append(*i); + + payload.append(last_chunk, 0, last_chunk_size); + + reinitializeChunks(); + + if (!payloads.push(payload)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to payloads queue"); + } +} + +void WriteBufferToNATSProducer::publish() +{ + uv_thread_t flush_thread; + + uv_thread_create(&flush_thread, publishThreadFunc, static_cast(this)); + + connection.getHandler().startLoop(); + uv_thread_join(&flush_thread); +} + +void WriteBufferToNATSProducer::publishThreadFunc(void * arg) +{ + WriteBufferToNATSProducer * buffer = static_cast(arg); + String payload; + + natsStatus status; + while (!buffer->payloads.empty()) + { + if (natsConnection_Buffered(buffer->connection.getConnection()) > MAX_BUFFERED) + break; + bool pop_result = buffer->payloads.pop(payload); + + if (!pop_result) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not pop payload"); + status = natsConnection_PublishString(buffer->connection.getConnection(), buffer->subject.c_str(), payload.c_str()); + + if (status != NATS_OK) + { + LOG_DEBUG(buffer->log, "Something went wrong during publishing to NATS subject. Nats status text: {}. Last error message: {}", + natsStatus_GetText(status), nats_GetLastError(nullptr)); + if (!buffer->payloads.push(std::move(payload))) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to payloads queue"); + break; + } + } + + nats_ReleaseThreadMemory(); +} + + +void WriteBufferToNATSProducer::writingFunc() +{ + try + { + while ((!payloads.empty() || wait_all) && !shutdown_called.load()) + { + publish(); + + LOG_DEBUG( + log, "Writing func {} {} {}", wait_payloads.load(), payloads.empty(), natsConnection_Buffered(connection.getConnection())); + if (wait_payloads.load() && payloads.empty() && natsConnection_Buffered(connection.getConnection()) == 0) + wait_all = false; + + if (!connection.isConnected() && wait_all) + connection.reconnect(); + + iterateEventLoop(); + } + } + catch (...) + { + tryLogCurrentException(log); + } + + LOG_DEBUG(log, "Producer on subject {} completed", subject); +} + + +void WriteBufferToNATSProducer::nextImpl() +{ + addChunk(); +} + +void WriteBufferToNATSProducer::addChunk() +{ + chunks.push_back(std::string()); + chunks.back().resize(chunk_size); + set(chunks.back().data(), chunk_size); +} + +void WriteBufferToNATSProducer::reinitializeChunks() +{ + rows = 0; + chunks.clear(); + /// We cannot leave the buffer in the undefined state (i.e. without any + /// underlying buffer), since in this case the WriteBuffeR::next() will + /// not call our nextImpl() (due to available() == 0) + addChunk(); +} + + +void WriteBufferToNATSProducer::iterateEventLoop() +{ + connection.getHandler().iterateLoop(); +} + +} diff --git a/src/Storages/NATS/WriteBufferToNATSProducer.h b/src/Storages/NATS/WriteBufferToNATSProducer.h new file mode 100644 index 00000000000..484d80598db --- /dev/null +++ b/src/Storages/NATS/WriteBufferToNATSProducer.h @@ -0,0 +1,81 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class WriteBufferToNATSProducer : public WriteBuffer +{ +public: + WriteBufferToNATSProducer( + const NATSConfiguration & configuration_, + ContextPtr global_context, + const String & subject_, + std::atomic & shutdown_called_, + Poco::Logger * log_, + std::optional delimiter, + size_t rows_per_message, + size_t chunk_size_); + + ~WriteBufferToNATSProducer() override; + + void countRow(); + void activateWriting() { writing_task->activateAndSchedule(); } + void updateMaxWait() { wait_payloads.store(true); } + +private: + void nextImpl() override; + void addChunk(); + void reinitializeChunks(); + + void iterateEventLoop(); + void writingFunc(); + void publish(); + + static void publishThreadFunc(void * arg); + + NATSConnectionManager connection; + const String subject; + + /* false: when shutdown is called + * true: in all other cases + */ + std::atomic & shutdown_called; + + BackgroundSchedulePool::TaskHolder writing_task; + + /* payloads.queue: + * - payloads are pushed to queue in countRow and popped by another thread in writingFunc, each payload gets into queue only once + */ + ConcurrentBoundedQueue payloads; + + /* false: message delivery successfully ended: publisher received confirm from server that all published + * 1) persistent messages were written to disk + * 2) non-persistent messages reached the queue + * true: continue to process deliveries and returned messages + */ + bool wait_all = true; + + /* false: until writeSuffix is called + * true: means payloads.queue will not grow anymore + */ + std::atomic wait_payloads = false; + + Poco::Logger * log; + const std::optional delim; + const size_t max_rows; + const size_t chunk_size; + size_t rows = 0; + std::list chunks; +}; + +} diff --git a/src/Storages/RabbitMQ/RabbitMQConnection.h b/src/Storages/RabbitMQ/RabbitMQConnection.h index acc3c48f85b..7a355afea0e 100644 --- a/src/Storages/RabbitMQ/RabbitMQConnection.h +++ b/src/Storages/RabbitMQ/RabbitMQConnection.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 58f08c48c68..73f0c8bd44e 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -93,18 +93,40 @@ StorageRabbitMQ::StorageRabbitMQ( , milliseconds_to_wait(RESCHEDULE_MS) , is_attach(is_attach_) { - auto parsed_address = parseAddress(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_host_port), 5672); - context_->getRemoteHostFilter().checkHostAndPort(parsed_address.first, toString(parsed_address.second)); + const auto & config = getContext()->getConfigRef(); + + std::pair parsed_address; + auto setting_rabbitmq_username = rabbitmq_settings->rabbitmq_username.value; + auto setting_rabbitmq_password = rabbitmq_settings->rabbitmq_password.value; + String username, password; + + if (rabbitmq_settings->rabbitmq_host_port.changed) + { + username = setting_rabbitmq_username.empty() ? config.getString("rabbitmq.username", "") : setting_rabbitmq_username; + password = setting_rabbitmq_password.empty() ? config.getString("rabbitmq.password", "") : setting_rabbitmq_password; + if (username.empty() || password.empty()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "No username or password. They can be specified either in config or in storage settings"); + + parsed_address = parseAddress(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_host_port), 5672); + if (parsed_address.first.empty()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Host or port is incorrect (host: {}, port: {})", parsed_address.first, parsed_address.second); + + context_->getRemoteHostFilter().checkHostAndPort(parsed_address.first, toString(parsed_address.second)); + } + else if (!rabbitmq_settings->rabbitmq_address.changed) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "RabbitMQ requires either `rabbitmq_host_port` or `rabbitmq_address` setting"); - auto rabbitmq_username = rabbitmq_settings->rabbitmq_username.value; - auto rabbitmq_password = rabbitmq_settings->rabbitmq_password.value; configuration = { .host = parsed_address.first, .port = parsed_address.second, - .username = rabbitmq_username.empty() ? getContext()->getConfigRef().getString("rabbitmq.username") : rabbitmq_username, - .password = rabbitmq_password.empty() ? getContext()->getConfigRef().getString("rabbitmq.password") : rabbitmq_password, - .vhost = getContext()->getConfigRef().getString("rabbitmq.vhost", getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_vhost)), + .username = username, + .password = password, + .vhost = config.getString("rabbitmq.vhost", getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_vhost)), .secure = rabbitmq_settings->rabbitmq_secure.value, .connection_string = getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_address) }; @@ -1064,9 +1086,6 @@ bool StorageRabbitMQ::streamToViews() sources.emplace_back(source); pipes.emplace_back(source); - // Limit read batch to maximum block size to allow DDL - StreamLocalLimits limits; - Poco::Timespan max_execution_time = rabbitmq_settings->rabbitmq_flush_interval_ms.changed ? rabbitmq_settings->rabbitmq_flush_interval_ms : getContext()->getSettingsRef().stream_flush_interval_ms; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 4c962f36e4f..85fb20d6571 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -1079,8 +1080,8 @@ void registerStorageBuffer(StorageFactory & factory) size_t i = 0; - String destination_database = engine_args[i++]->as().value.safeGet(); - String destination_table = engine_args[i++]->as().value.safeGet(); + String destination_database = checkAndGetLiteralArgument(engine_args[i++], "destination_database"); + String destination_table = checkAndGetLiteralArgument(engine_args[i++], "destination_table"); UInt64 num_buckets = applyVisitor(FieldVisitorConvertToNumber(), engine_args[i++]->as().value); diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index fda6da6c1ff..2839ac03a5b 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB @@ -339,7 +340,7 @@ void registerStorageDictionary(StorageFactory & factory) ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); args.engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args.engine_args[0], local_context); - String dictionary_name = args.engine_args[0]->as().value.safeGet(); + String dictionary_name = checkAndGetLiteralArgument(args.engine_args[0], "dictionary_name"); if (!args.attach) { diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 1c785df9be4..03eb400a8ad 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -34,10 +35,6 @@ #include #include #include -#include -#include -#include -#include #include #include @@ -1437,15 +1434,15 @@ void registerStorageDistributed(StorageFactory & factory) engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], local_context); engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], local_context); - String remote_database = engine_args[1]->as().value.safeGet(); - String remote_table = engine_args[2]->as().value.safeGet(); + String remote_database = checkAndGetLiteralArgument(engine_args[1], "remote_database"); + String remote_table = checkAndGetLiteralArgument(engine_args[2], "remote_table"); const auto & sharding_key = engine_args.size() >= 4 ? engine_args[3] : nullptr; String storage_policy = "default"; if (engine_args.size() >= 5) { engine_args[4] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[4], local_context); - storage_policy = engine_args[4]->as().value.safeGet(); + storage_policy = checkAndGetLiteralArgument(engine_args[4], "storage_policy"); } /// Check that sharding_key exists in the table and has numeric type. diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index e0cbdbe98af..2931e62b7ef 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -22,6 +22,7 @@ #include #include #include +#include namespace DB @@ -179,14 +180,14 @@ void registerStorageExecutable(StorageFactory & factory) for (size_t i = 0; i < 2; ++i) args.engine_args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args.engine_args[i], local_context); - auto scipt_name_with_arguments_value = args.engine_args[0]->as().value.safeGet(); + auto script_name_with_arguments_value = checkAndGetLiteralArgument(args.engine_args[0], "script_name_with_arguments_value"); std::vector script_name_with_arguments; - boost::split(script_name_with_arguments, scipt_name_with_arguments_value, [](char c) { return c == ' '; }); + boost::split(script_name_with_arguments, script_name_with_arguments_value, [](char c) { return c == ' '; }); auto script_name = script_name_with_arguments[0]; script_name_with_arguments.erase(script_name_with_arguments.begin()); - auto format = args.engine_args[1]->as().value.safeGet(); + auto format = checkAndGetLiteralArgument(args.engine_args[1], "format"); std::vector input_queries; for (size_t i = 2; i < args.engine_args.size(); ++i) diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index 181cf0ca183..dcb7a90b2f6 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -3,13 +3,9 @@ #include #include -#include -#include #include -#include #include #include -#include #include #include #include @@ -17,6 +13,7 @@ #include #include #include +#include #include #include @@ -95,10 +92,13 @@ StorageExternalDistributed::StorageExternalDistributed( postgres_conf.set(configuration); postgres_conf.addresses = addresses; + const auto & settings = context->getSettingsRef(); auto pool = std::make_shared( postgres_conf, - context->getSettingsRef().postgresql_connection_pool_size, - context->getSettingsRef().postgresql_connection_pool_wait_timeout); + settings.postgresql_connection_pool_size, + settings.postgresql_connection_pool_wait_timeout, + POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, + settings.postgresql_connection_pool_auto_close_connection); shard = std::make_shared(table_id_, std::move(pool), configuration.table, columns_, constraints_, String{}); break; @@ -229,7 +229,7 @@ void registerStorageExternalDistributed(StorageFactory & factory) if (engine_args.size() < 2) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine ExternalDistributed must have at least 2 arguments: engine_name, named_collection and/or description"); - auto engine_name = engine_args[0]->as().value.safeGet(); + auto engine_name = checkAndGetLiteralArgument(engine_args[0], "engine_name"); StorageExternalDistributed::ExternalStorageEngine table_engine; if (engine_name == "URL") table_engine = StorageExternalDistributed::ExternalStorageEngine::URL; @@ -256,7 +256,7 @@ void registerStorageExternalDistributed(StorageFactory & factory) for (const auto & [name, value] : storage_specific_args) { if (name == "description") - cluster_description = value->as()->value.safeGet(); + cluster_description = checkAndGetLiteralArgument(value, "cluster_description"); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown key-value argument {} for table engine URL", name); @@ -271,11 +271,11 @@ void registerStorageExternalDistributed(StorageFactory & factory) for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.getLocalContext()); - cluster_description = engine_args[1]->as().value.safeGet(); - configuration.format = engine_args[2]->as().value.safeGet(); + cluster_description = checkAndGetLiteralArgument(engine_args[1], "cluster_description"); + configuration.format = checkAndGetLiteralArgument(engine_args[2], "format"); configuration.compression_method = "auto"; if (engine_args.size() == 4) - configuration.compression_method = engine_args[3]->as().value.safeGet(); + configuration.compression_method = checkAndGetLiteralArgument(engine_args[3], "compression_method"); } @@ -302,7 +302,7 @@ void registerStorageExternalDistributed(StorageFactory & factory) for (const auto & [name, value] : storage_specific_args) { if (name == "description") - cluster_description = value->as()->value.safeGet(); + cluster_description = checkAndGetLiteralArgument(value, "cluster_description"); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown key-value argument {} for table function URL", name); @@ -320,11 +320,11 @@ void registerStorageExternalDistributed(StorageFactory & factory) "ExternalDistributed('engine_name', 'cluster_description', 'database', 'table', 'user', 'password').", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - cluster_description = engine_args[1]->as().value.safeGet(); - configuration.database = engine_args[2]->as().value.safeGet(); - configuration.table = engine_args[3]->as().value.safeGet(); - configuration.username = engine_args[4]->as().value.safeGet(); - configuration.password = engine_args[5]->as().value.safeGet(); + cluster_description = checkAndGetLiteralArgument(engine_args[1], "cluster_description"); + configuration.database = checkAndGetLiteralArgument(engine_args[2], "database"); + configuration.table = checkAndGetLiteralArgument(engine_args[3], "table"); + configuration.username = checkAndGetLiteralArgument(engine_args[4], "username"); + configuration.password = checkAndGetLiteralArgument(engine_args[5], "password"); } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 2fa6003c0eb..d138104018a 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1,5 +1,10 @@ #include #include +#include +#include +#include +#include +#include #include #include @@ -20,30 +25,26 @@ #include #include #include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include #include #include #include #include #include +#include + +#include +#include +#include +#include + #include #include -#include + +#include +#include +#include +#include +#include namespace fs = std::filesystem; @@ -1103,7 +1104,7 @@ void registerStorageFile(StorageFactory & factory) ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); engine_args_ast[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args_ast[0], factory_args.getLocalContext()); - storage_args.format_name = engine_args_ast[0]->as().value.safeGet(); + storage_args.format_name = checkAndGetLiteralArgument(engine_args_ast[0], "format_name"); // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current @@ -1171,7 +1172,7 @@ void registerStorageFile(StorageFactory & factory) if (engine_args_ast.size() == 3) { engine_args_ast[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args_ast[2], factory_args.getLocalContext()); - storage_args.compression_method = engine_args_ast[2]->as().value.safeGet(); + storage_args.compression_method = checkAndGetLiteralArgument(engine_args_ast[2], "compression_method"); } else storage_args.compression_method = "auto"; diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index fa0baea40cd..d875b4ee80c 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -12,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -469,16 +469,16 @@ void registerStorageGenerateRandom(StorageFactory & factory) if (!engine_args.empty()) { - const Field & value = engine_args[0]->as().value; - if (!value.isNull()) - random_seed = value.safeGet(); + const auto & ast_literal = engine_args[0]->as(); + if (!ast_literal.value.isNull()) + random_seed = checkAndGetLiteralArgument(ast_literal, "random_seed"); } if (engine_args.size() >= 2) - max_string_length = engine_args[1]->as().value.safeGet(); + max_string_length = checkAndGetLiteralArgument(engine_args[1], "max_string_length"); if (engine_args.size() == 3) - max_array_length = engine_args[2]->as().value.safeGet(); + max_array_length = checkAndGetLiteralArgument(engine_args[2], "max_array_length"); return std::make_shared(args.table_id, args.columns, args.comment, max_array_length, max_string_length, random_seed); }); diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 1324ebf5b28..ac6ead54016 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -51,6 +51,7 @@ namespace ErrorCodes extern const int SIZES_OF_MARKS_FILES_ARE_INCONSISTENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INCORRECT_FILE_NAME; + extern const int CANNOT_RESTORE_TABLE; } /// NOTE: The lock `StorageLog::rwlock` is NOT kept locked while reading, @@ -921,11 +922,8 @@ std::optional StorageLog::totalBytes(const Settings &) const return total_bytes; } -void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) +void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - auto lock_timeout = getLockTimeout(backup_entries_collector.getContext()); loadMarks(lock_timeout); @@ -986,16 +984,16 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c } } -void StorageLog::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) +void StorageLog::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); + auto backup = restorer.getBackup(); + if (!backup->hasFiles(data_path_in_backup)) + return; if (!num_data_files) return; - auto backup = restorer.getBackup(); - if (!restorer.isNonEmptyTableAllowed() && total_bytes && backup->hasFiles(data_path_in_backup)) + if (!restorer.isNonEmptyTableAllowed() && total_bytes) RestorerFromBackup::throwTableIsNotEmpty(getStorageID()); auto lock_timeout = getLockTimeout(restorer.getContext()); @@ -1024,6 +1022,11 @@ void StorageLog::restoreDataImpl(const BackupPtr & backup, const String & data_p for (const auto & data_file : data_files) { String file_path_in_backup = data_path_in_backup_fs / fileName(data_file.path); + if (!backup->fileExists(file_path_in_backup)) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", + getStorageID().getFullTableName(), file_path_in_backup); + } auto backup_entry = backup->readFile(file_path_in_backup); auto in = backup_entry->getReadBuffer(); auto out = disk->writeFile(data_file.path, max_compress_block_size, WriteMode::Append); @@ -1035,6 +1038,11 @@ void StorageLog::restoreDataImpl(const BackupPtr & backup, const String & data_p /// Append marks. size_t num_extra_marks = 0; String file_path_in_backup = data_path_in_backup_fs / fileName(marks_file_path); + if (!backup->fileExists(file_path_in_backup)) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", + getStorageID().getFullTableName(), file_path_in_backup); + } size_t file_size = backup->getFileSize(file_path_in_backup); if (file_size % (num_data_files * sizeof(Mark)) != 0) throw Exception("Size of marks file is inconsistent", ErrorCodes::SIZES_OF_MARKS_FILES_ARE_INCONSISTENT); diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 2ece0af3359..b01415f9590 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -421,6 +421,13 @@ void StorageMaterializedView::restoreDataFromBackup(RestorerFromBackup & restore return getTargetTable()->restoreDataFromBackup(restorer, data_path_in_backup, partitions); } +bool StorageMaterializedView::supportsBackupPartition() const +{ + if (hasInnerTable()) + return getTargetTable()->supportsBackupPartition(); + return false; +} + std::optional StorageMaterializedView::totalRows(const Settings & settings) const { if (hasInnerTable()) diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 0adf394876c..1d8808b302e 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -97,6 +97,7 @@ public: void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; + bool supportsBackupPartition() const override; std::optional totalRows(const Settings & settings) const override; std::optional totalBytes(const Settings & settings) const override; diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 5de8c3bda43..7baecaa594f 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -38,6 +38,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int CANNOT_RESTORE_TABLE; } @@ -479,24 +480,21 @@ namespace }; } -void StorageMemory::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) +void StorageMemory::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - auto max_compress_block_size = backup_entries_collector.getContext()->getSettingsRef().max_compress_block_size; backup_entries_collector.addBackupEntries( std::make_shared(getInMemoryMetadataPtr(), data.get(), data_path_in_backup, max_compress_block_size) ->getBackupEntries()); } -void StorageMemory::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) +void StorageMemory::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto backup = restorer.getBackup(); - if (!restorer.isNonEmptyTableAllowed() && total_size_bytes && backup->hasFiles(data_path_in_backup)) + if (!backup->hasFiles(data_path_in_backup)) + return; + + if (!restorer.isNonEmptyTableAllowed() && total_size_bytes) RestorerFromBackup::throwTableIsNotEmpty(getStorageID()); restorer.addDataRestoreTask( @@ -514,6 +512,11 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat IndexForNativeFormat index; { String index_file_path = data_path_in_backup_fs / "index.mrk"; + if (!backup->fileExists(index_file_path)) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", + getStorageID().getFullTableName(), index_file_path); + } auto backup_entry = backup->readFile(index_file_path); auto in = backup_entry->getReadBuffer(); CompressedReadBuffer compressed_in{*in}; @@ -526,6 +529,11 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat size_t new_rows = 0; { String data_file_path = data_path_in_backup_fs / "data.bin"; + if (!backup->fileExists(data_file_path)) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", + getStorageID().getFullTableName(), data_file_path); + } auto backup_entry = backup->readFile(data_file_path); std::unique_ptr in = backup_entry->getReadBuffer(); std::optional temp_data_copy; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index f6d7e8e7afd..0afc7a0df7e 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1,9 +1,10 @@ #include -#include +#include #include #include #include #include +#include #include #include #include @@ -22,17 +23,16 @@ #include #include #include -#include "Processors/QueryPlan/BuildQueryPipelineSettings.h" -#include "Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h" -#include -#include -#include -#include +#include +#include #include #include #include #include -#include +#include +#include +#include +#include namespace DB @@ -848,7 +848,7 @@ std::tuple StorageMerge::evaluateDatabaseName(cons throw Exception("REGEXP in Merge ENGINE takes only one argument", ErrorCodes::BAD_ARGUMENTS); auto * literal = func->arguments->children[0]->as(); - if (!literal || literal->value.safeGet().empty()) + if (!literal || literal->value.getType() != Field::Types::Which::String || literal->value.safeGet().empty()) throw Exception("Argument for REGEXP in Merge ENGINE should be a non empty String Literal", ErrorCodes::BAD_ARGUMENTS); return {true, func->arguments->children[0]}; @@ -879,10 +879,10 @@ void registerStorageMerge(StorageFactory & factory) if (!is_regexp) engine_args[0] = database_ast; - String source_database_name_or_regexp = database_ast->as().value.safeGet(); + String source_database_name_or_regexp = checkAndGetLiteralArgument(database_ast, "database_name"); engine_args[1] = evaluateConstantExpressionAsLiteral(engine_args[1], args.getLocalContext()); - String table_name_regexp = engine_args[1]->as().value.safeGet(); + String table_name_regexp = checkAndGetLiteralArgument(engine_args[1], "table_name_regexp"); return std::make_shared( args.table_id, args.columns, args.comment, source_database_name_or_regexp, is_regexp, table_name_regexp, args.getContext()); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 5fe7214194a..f7302863e85 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1540,11 +1540,14 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( loaded_parts[i]->storeVersionMetadata(); String old_name = renamed_parts.old_and_new_names[i].old_name; + /// It's important to create it outside of lock scope because + /// otherwise it can lock parts in destructor and deadlock is possible. + MergeTreeData::Transaction transaction(*this, local_context->getCurrentTransaction().get()); { auto lock = lockParts(); - MergeTreeData::Transaction transaction(*this, local_context->getCurrentTransaction().get()); + auto builder = loaded_parts[i]->data_part_storage->getBuilder(); fillNewPartName(loaded_parts[i], lock); - renameTempPartAndAdd(loaded_parts[i], transaction, lock); + renameTempPartAndAdd(loaded_parts[i], transaction, builder, lock); transaction.commit(&lock); } @@ -1625,7 +1628,9 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con for (auto part : dst_parts) { fillNewPartName(part, data_parts_lock); - renameTempPartAndReplaceUnlocked(part, transaction, data_parts_lock); + + auto builder = part->data_part_storage->getBuilder(); + renameTempPartAndReplaceUnlocked(part, transaction, builder, data_parts_lock); } /// Populate transaction transaction.commit(&data_parts_lock); @@ -1702,8 +1707,9 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const for (auto & part : dst_parts) { + auto builder = part->data_part_storage->getBuilder(); dest_table_storage->fillNewPartName(part, dest_data_parts_lock); - dest_table_storage->renameTempPartAndReplaceUnlocked(part, transaction, dest_data_parts_lock); + dest_table_storage->renameTempPartAndReplaceUnlocked(part, transaction, builder, dest_data_parts_lock); } @@ -1797,13 +1803,19 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ void StorageMergeTree::attachRestoredParts(MutableDataPartsVector && parts) { + for (auto part : parts) { - auto lock = lockParts(); + /// It's important to create it outside of lock scope because + /// otherwise it can lock parts in destructor and deadlock is possible. MergeTreeData::Transaction transaction(*this, NO_TRANSACTION_RAW); - fillNewPartName(part, lock); - renameTempPartAndAdd(part, transaction, lock); - transaction.commit(&lock); + auto builder = part->data_part_storage->getBuilder(); + { + auto lock = lockParts(); + fillNewPartName(part, lock); + renameTempPartAndAdd(part, transaction, builder, lock); + transaction.commit(&lock); + } } } diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 11a1f8ba4d6..1f2523c8645 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -1,11 +1,12 @@ -#include "StorageMongoDB.h" -#include "StorageMongoDBSocketFactory.h" +#include +#include +#include +#include #include #include #include #include -#include #include #include #include @@ -120,7 +121,7 @@ StorageMongoDBConfiguration StorageMongoDB::getConfiguration(ASTs engine_args, C for (const auto & [arg_name, arg_value] : storage_specific_args) { if (arg_name == "options") - configuration.options = arg_value->as()->value.safeGet(); + configuration.options = checkAndGetLiteralArgument(arg_value, "options"); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected key-value argument." @@ -139,17 +140,17 @@ StorageMongoDBConfiguration StorageMongoDB::getConfiguration(ASTs engine_args, C engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context); /// 27017 is the default MongoDB port. - auto parsed_host_port = parseAddress(engine_args[0]->as().value.safeGet(), 27017); + auto parsed_host_port = parseAddress(checkAndGetLiteralArgument(engine_args[0], "host:port"), 27017); configuration.host = parsed_host_port.first; configuration.port = parsed_host_port.second; - configuration.database = engine_args[1]->as().value.safeGet(); - configuration.table = engine_args[2]->as().value.safeGet(); - configuration.username = engine_args[3]->as().value.safeGet(); - configuration.password = engine_args[4]->as().value.safeGet(); + configuration.database = checkAndGetLiteralArgument(engine_args[1], "database"); + configuration.table = checkAndGetLiteralArgument(engine_args[2], "table"); + configuration.username = checkAndGetLiteralArgument(engine_args[3], "username"); + configuration.password = checkAndGetLiteralArgument(engine_args[4], "password"); if (engine_args.size() >= 6) - configuration.options = engine_args[5]->as().value.safeGet(); + configuration.options = checkAndGetLiteralArgument(engine_args[5], "database"); } diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 3ed97712292..7fe008eead4 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -5,14 +5,12 @@ #include #include #include +#include #include #include -#include -#include #include #include #include -#include #include #include #include @@ -253,9 +251,9 @@ StorageMySQLConfiguration StorageMySQL::getConfiguration(ASTs engine_args, Conte for (const auto & [arg_name, arg_value] : storage_specific_args) { if (arg_name == "replace_query") - configuration.replace_query = arg_value->as()->value.safeGet(); + configuration.replace_query = checkAndGetLiteralArgument(arg_value, "replace_query"); else if (arg_name == "on_duplicate_clause") - configuration.on_duplicate_clause = arg_value->as()->value.safeGet(); + configuration.on_duplicate_clause = checkAndGetLiteralArgument(arg_value, "on_duplicate_clause"); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected key-value argument." @@ -273,18 +271,18 @@ StorageMySQLConfiguration StorageMySQL::getConfiguration(ASTs engine_args, Conte for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context_); - const auto & host_port = engine_args[0]->as().value.safeGet(); + const auto & host_port = checkAndGetLiteralArgument(engine_args[0], "host:port"); size_t max_addresses = context_->getSettingsRef().glob_expansion_max_elements; configuration.addresses = parseRemoteDescriptionForExternalDatabase(host_port, max_addresses, 3306); - configuration.database = engine_args[1]->as().value.safeGet(); - configuration.table = engine_args[2]->as().value.safeGet(); - configuration.username = engine_args[3]->as().value.safeGet(); - configuration.password = engine_args[4]->as().value.safeGet(); + configuration.database = checkAndGetLiteralArgument(engine_args[1], "database"); + configuration.table = checkAndGetLiteralArgument(engine_args[2], "table"); + configuration.username = checkAndGetLiteralArgument(engine_args[3], "username"); + configuration.password = checkAndGetLiteralArgument(engine_args[4], "password"); if (engine_args.size() >= 6) - configuration.replace_query = engine_args[5]->as().value.safeGet(); + configuration.replace_query = checkAndGetLiteralArgument(engine_args[5], "replace_query"); if (engine_args.size() == 7) - configuration.on_duplicate_clause = engine_args[6]->as().value.safeGet(); + configuration.on_duplicate_clause = checkAndGetLiteralArgument(engine_args[6], "on_duplicate_clause"); } for (const auto & address : configuration.addresses) context_->getRemoteHostFilter().checkHostAndPort(address.first, toString(address.second)); diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 5b57384c1dd..e0c6dbf5463 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -11,8 +11,6 @@ #include #include -#include -#include #include #include @@ -31,7 +29,6 @@ #include #include -#include #include #include @@ -40,6 +37,7 @@ #include #include +#include namespace DB @@ -400,7 +398,7 @@ StoragePostgreSQLConfiguration StoragePostgreSQL::getConfiguration(ASTs engine_a for (const auto & [arg_name, arg_value] : storage_specific_args) { if (arg_name == "on_conflict") - configuration.on_conflict = arg_value->as()->value.safeGet(); + configuration.on_conflict = checkAndGetLiteralArgument(arg_value, "on_conflict"); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected key-value argument." @@ -418,7 +416,7 @@ StoragePostgreSQLConfiguration StoragePostgreSQL::getConfiguration(ASTs engine_a for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context); - const auto & host_port = engine_args[0]->as().value.safeGet(); + const auto & host_port = checkAndGetLiteralArgument(engine_args[0], "host:port"); size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements; configuration.addresses = parseRemoteDescriptionForExternalDatabase(host_port, max_addresses, 5432); @@ -427,15 +425,15 @@ StoragePostgreSQLConfiguration StoragePostgreSQL::getConfiguration(ASTs engine_a configuration.host = configuration.addresses[0].first; configuration.port = configuration.addresses[0].second; } - configuration.database = engine_args[1]->as().value.safeGet(); - configuration.table = engine_args[2]->as().value.safeGet(); - configuration.username = engine_args[3]->as().value.safeGet(); - configuration.password = engine_args[4]->as().value.safeGet(); + configuration.database = checkAndGetLiteralArgument(engine_args[1], "host:port"); + configuration.table = checkAndGetLiteralArgument(engine_args[2], "table"); + configuration.username = checkAndGetLiteralArgument(engine_args[3], "username"); + configuration.password = checkAndGetLiteralArgument(engine_args[4], "password"); if (engine_args.size() >= 6) - configuration.schema = engine_args[5]->as().value.safeGet(); + configuration.schema = checkAndGetLiteralArgument(engine_args[5], "schema"); if (engine_args.size() >= 7) - configuration.on_conflict = engine_args[6]->as().value.safeGet(); + configuration.on_conflict = checkAndGetLiteralArgument(engine_args[6], "on_conflict"); } for (const auto & address : configuration.addresses) context->getRemoteHostFilter().checkHostAndPort(address.first, toString(address.second)); @@ -449,9 +447,12 @@ void registerStoragePostgreSQL(StorageFactory & factory) factory.registerStorage("PostgreSQL", [](const StorageFactory::Arguments & args) { auto configuration = StoragePostgreSQL::getConfiguration(args.engine_args, args.getLocalContext()); + const auto & settings = args.getContext()->getSettingsRef(); auto pool = std::make_shared(configuration, - args.getContext()->getSettingsRef().postgresql_connection_pool_size, - args.getContext()->getSettingsRef().postgresql_connection_pool_wait_timeout); + settings.postgresql_connection_pool_size, + settings.postgresql_connection_pool_wait_timeout, + POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, + settings.postgresql_connection_pool_auto_close_connection); return std::make_shared( args.table_id, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e44013f39ca..ae9f7640f66 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1100,123 +1101,8 @@ void StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_pr void StorageReplicatedMergeTree::setTableStructure( ColumnsDescription new_columns, const ReplicatedMergeTreeTableMetadata::Diff & metadata_diff) { - StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); - - new_metadata.columns = new_columns; - - if (!metadata_diff.empty()) - { - auto parse_key_expr = [] (const String & key_expr) - { - ParserNotEmptyExpressionList parser(false); - auto new_sorting_key_expr_list = parseQuery(parser, key_expr, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - - ASTPtr order_by_ast; - if (new_sorting_key_expr_list->children.size() == 1) - order_by_ast = new_sorting_key_expr_list->children[0]; - else - { - auto tuple = makeASTFunction("tuple"); - tuple->arguments->children = new_sorting_key_expr_list->children; - order_by_ast = tuple; - } - return order_by_ast; - }; - - if (metadata_diff.sorting_key_changed) - { - auto order_by_ast = parse_key_expr(metadata_diff.new_sorting_key); - auto & sorting_key = new_metadata.sorting_key; - auto & primary_key = new_metadata.primary_key; - - sorting_key.recalculateWithNewAST(order_by_ast, new_metadata.columns, getContext()); - - if (primary_key.definition_ast == nullptr) - { - /// Primary and sorting key become independent after this ALTER so we have to - /// save the old ORDER BY expression as the new primary key. - auto old_sorting_key_ast = old_metadata.getSortingKey().definition_ast; - primary_key = KeyDescription::getKeyFromAST( - old_sorting_key_ast, new_metadata.columns, getContext()); - } - } - - if (metadata_diff.sampling_expression_changed) - { - if (!metadata_diff.new_sampling_expression.empty()) - { - auto sample_by_ast = parse_key_expr(metadata_diff.new_sampling_expression); - new_metadata.sampling_key.recalculateWithNewAST(sample_by_ast, new_metadata.columns, getContext()); - } - else /// SAMPLE BY was removed - { - new_metadata.sampling_key = {}; - } - } - - if (metadata_diff.skip_indices_changed) - new_metadata.secondary_indices = IndicesDescription::parse(metadata_diff.new_skip_indices, new_columns, getContext()); - - if (metadata_diff.constraints_changed) - new_metadata.constraints = ConstraintsDescription::parse(metadata_diff.new_constraints); - - if (metadata_diff.projections_changed) - new_metadata.projections = ProjectionsDescription::parse(metadata_diff.new_projections, new_columns, getContext()); - - if (metadata_diff.ttl_table_changed) - { - if (!metadata_diff.new_ttl_table.empty()) - { - ParserTTLExpressionList parser; - auto ttl_for_table_ast = parseQuery(parser, metadata_diff.new_ttl_table, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - new_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( - ttl_for_table_ast, new_metadata.columns, getContext(), new_metadata.primary_key); - } - else /// TTL was removed - { - new_metadata.table_ttl = TTLTableDescription{}; - } - } - } - - /// Changes in columns may affect following metadata fields - new_metadata.column_ttls_by_name.clear(); - for (const auto & [name, ast] : new_metadata.columns.getColumnTTLs()) - { - auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, new_metadata.columns, getContext(), new_metadata.primary_key); - new_metadata.column_ttls_by_name[name] = new_ttl_entry; - } - - if (new_metadata.partition_key.definition_ast != nullptr) - new_metadata.partition_key.recalculateWithNewColumns(new_metadata.columns, getContext()); - - if (!metadata_diff.sorting_key_changed) /// otherwise already updated - new_metadata.sorting_key.recalculateWithNewColumns(new_metadata.columns, getContext()); - - /// Primary key is special, it exists even if not defined - if (new_metadata.primary_key.definition_ast != nullptr) - { - new_metadata.primary_key.recalculateWithNewColumns(new_metadata.columns, getContext()); - } - else - { - new_metadata.primary_key = KeyDescription::getKeyFromAST(new_metadata.sorting_key.definition_ast, new_metadata.columns, getContext()); - new_metadata.primary_key.definition_ast = nullptr; - } - - if (!metadata_diff.sampling_expression_changed && new_metadata.sampling_key.definition_ast != nullptr) - new_metadata.sampling_key.recalculateWithNewColumns(new_metadata.columns, getContext()); - - if (!metadata_diff.skip_indices_changed) /// otherwise already updated - { - for (auto & index : new_metadata.secondary_indices) - index.recalculateWithNewColumns(new_metadata.columns, getContext()); - } - - if (!metadata_diff.ttl_table_changed && new_metadata.table_ttl.definition_ast != nullptr) - new_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( - new_metadata.table_ttl.definition_ast, new_metadata.columns, getContext(), new_metadata.primary_key); + StorageInMemoryMetadata new_metadata = metadata_diff.getNewMetadata(new_columns, getContext(), old_metadata); /// Even if the primary/sorting/partition keys didn't change we must reinitialize it /// because primary/partition key column types might have changed. @@ -1668,7 +1554,8 @@ bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry) Transaction transaction(*this, NO_TRANSACTION_RAW); part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - renameTempPartAndReplace(part, transaction); + auto builder = part->data_part_storage->getBuilder(); + renameTempPartAndReplace(part, transaction, builder); checkPartChecksumsAndCommit(transaction, part); writePartLog(PartLogElement::Type::NEW_PART, {}, 0 /** log entry is fake so we don't measure the time */, @@ -2353,7 +2240,8 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) Coordination::Requests ops; for (PartDescriptionPtr & part_desc : final_parts) { - renameTempPartAndReplace(part_desc->res_part, transaction); + auto builder = part_desc->res_part->data_part_storage->getBuilder(); + renameTempPartAndReplace(part_desc->res_part, transaction, builder); getCommitPartOps(ops, part_desc->res_part); lockSharedData(*part_desc->res_part, false, part_desc->hardlinked_files); @@ -2450,7 +2338,10 @@ void StorageReplicatedMergeTree::executeClonePartFromShard(const LogEntry & entr part = get_part(); // The fetched part is valuable and should not be cleaned like a temp part. part->is_temp = false; - part->renameTo("detached/" + entry.new_part_name, true); + auto builder = part->data_part_storage->getBuilder(); + part->renameTo("detached/" + entry.new_part_name, true, builder); + builder->commit(); + LOG_INFO(log, "Cloned part {} to detached directory", part->name); } } @@ -4089,10 +3980,11 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora { part = get_part(); + auto builder = part->data_part_storage->getBuilder(); if (!to_detached) { Transaction transaction(*this, NO_TRANSACTION_RAW); - renameTempPartAndReplace(part, transaction); + renameTempPartAndReplace(part, transaction, builder); replaced_parts = checkPartChecksumsAndCommit(transaction, part, hardlinked_files); @@ -4134,7 +4026,8 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora { // The fetched part is valuable and should not be cleaned like a temp part. part->is_temp = false; - part->renameTo(fs::path("detached") / part_name, true); + part->renameTo(fs::path("detached") / part_name, true, builder); + builder->commit(); } } catch (const Exception & e) @@ -4165,8 +4058,12 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora } -DataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart(const String & part_name, const StorageMetadataPtr & metadata_snapshot, - const String & source_replica_path, DiskPtr replaced_disk, String replaced_part_path) +DataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( + const String & part_name, + const StorageMetadataPtr & metadata_snapshot, + const String & source_replica_path, + DiskPtr replaced_disk, + String replaced_part_path) { auto zookeeper = getZooKeeper(); const auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); @@ -4240,7 +4137,10 @@ DataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart(const String & pa throw Exception("Part " + part->name + " fetched on wrong disk " + part->data_part_storage->getDiskName(), ErrorCodes::LOGICAL_ERROR); auto replaced_path = fs::path(replaced_part_path); - part->data_part_storage->rename(replaced_path.parent_path(), replaced_path.filename(), nullptr, true, false); + auto builder = part->data_part_storage->getBuilder(); + builder->rename(replaced_path.parent_path(), replaced_path.filename(), nullptr, true, false); + part->data_part_storage->onRename(replaced_path.parent_path(), replaced_path.filename()); + builder->commit(); } catch (const Exception & e) { @@ -6614,7 +6514,10 @@ void StorageReplicatedMergeTree::replacePartitionFrom( { auto data_parts_lock = lockParts(); for (auto & part : dst_parts) - renameTempPartAndReplaceUnlocked(part, transaction, data_parts_lock); + { + auto builder = part->data_part_storage->getBuilder(); + renameTempPartAndReplaceUnlocked(part, transaction, builder, data_parts_lock); + } } for (size_t i = 0; i < dst_parts.size(); ++i) @@ -6848,7 +6751,10 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta auto dest_data_parts_lock = dest_table_storage->lockParts(); for (auto & part : dst_parts) - dest_table_storage->renameTempPartAndReplaceUnlocked(part, transaction, dest_data_parts_lock); + { + auto builder = part->data_part_storage->getBuilder(); + dest_table_storage->renameTempPartAndReplaceUnlocked(part, transaction, builder, dest_data_parts_lock); + } for (size_t i = 0; i < dst_parts.size(); ++i) dest_table_storage->lockSharedData(*dst_parts[i], false, hardlinked_files_for_parts[i]); @@ -7511,6 +7417,24 @@ void StorageReplicatedMergeTree::createTableSharedID() } +std::optional StorageReplicatedMergeTree::tryGetTableSharedIDFromCreateQuery(const IAST & create_query, const ContextPtr & global_context) +{ + auto zk_path = tryExtractZkPathFromCreateQuery(create_query, global_context); + if (!zk_path) + return {}; + + String zk_name = zkutil::extractZooKeeperName(*zk_path); + zk_path = zkutil::extractZooKeeperPath(*zk_path, false, nullptr); + zkutil::ZooKeeperPtr zookeeper = (zk_name == getDefaultZooKeeperName()) ? global_context->getZooKeeper() : global_context->getAuxiliaryZooKeeper(zk_name); + + String id; + if (!zookeeper->tryGet(fs::path(*zk_path) / "table_shared_id", id)) + return {}; + + return id; +} + + void StorageReplicatedMergeTree::lockSharedDataTemporary(const String & part_name, const String & part_id, const DiskPtr & disk) const { auto settings = getSettings(); @@ -8027,7 +7951,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP try { MergeTreeData::Transaction transaction(*this, NO_TRANSACTION_RAW); - auto replaced_parts = renameTempPartAndReplace(new_data_part, transaction); + auto replaced_parts = renameTempPartAndReplace(new_data_part, transaction, data_part_storage_builder); if (!replaced_parts.empty()) { @@ -8261,44 +8185,21 @@ void StorageReplicatedMergeTree::createAndStoreFreezeMetadata(DiskPtr disk, Data } -ASTPtr StorageReplicatedMergeTree::getCreateQueryForBackup(const ContextPtr & local_context, DatabasePtr * database) const +void StorageReplicatedMergeTree::adjustCreateQueryForBackup(ASTPtr & create_query) const { - ASTPtr query = MergeTreeData::getCreateQueryForBackup(local_context, database); + /// Adjust the create query using values from ZooKeeper. + auto zookeeper = getZooKeeper(); + auto columns_from_entry = ColumnsDescription::parse(zookeeper->get(fs::path(zookeeper_path) / "columns")); + auto metadata_from_entry = ReplicatedMergeTreeTableMetadata::parse(zookeeper->get(fs::path(zookeeper_path) / "metadata")); - /// Before storing the metadata in a backup we have to find a zookeeper path in its definition and turn the table's UUID in there - /// back into "{uuid}", and also we probably can remove the zookeeper path and replica name if they're default. - /// So we're kind of reverting what we had done to the table's definition in registerStorageMergeTree.cpp before we created this table. - auto & create = query->as(); - if (create.storage && create.storage->engine && (create.uuid != UUIDHelpers::Nil)) - { - auto & engine = *(create.storage->engine); - if (auto * engine_args_ast = typeid_cast(engine.arguments.get())) - { - auto & engine_args = engine_args_ast->children; - if (engine_args.size() >= 2) - { - auto * zookeeper_path_ast = typeid_cast(engine_args[0].get()); - auto * replica_name_ast = typeid_cast(engine_args[1].get()); - if (zookeeper_path_ast && (zookeeper_path_ast->value.getType() == Field::Types::String) && - replica_name_ast && (replica_name_ast->value.getType() == Field::Types::String)) - { - String & zookeeper_path_arg = zookeeper_path_ast->value.get(); - String & replica_name_arg = replica_name_ast->value.get(); - String table_uuid_str = toString(create.uuid); - if (size_t uuid_pos = zookeeper_path_arg.find(table_uuid_str); uuid_pos != String::npos) - zookeeper_path_arg.replace(uuid_pos, table_uuid_str.size(), "{uuid}"); - const auto & config = getContext()->getConfigRef(); - if ((zookeeper_path_arg == getDefaultZooKeeperPath(config)) && (replica_name_arg == getDefaultReplicaName(config)) - && ((engine_args.size() == 2) || !engine_args[2]->as())) - { - engine_args.erase(engine_args.begin(), engine_args.begin() + 2); - } - } - } - } - } + auto current_metadata = getInMemoryMetadataPtr(); + auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, current_metadata).checkAndFindDiff(metadata_from_entry, current_metadata->getColumns(), getContext()); + auto adjusted_metadata = metadata_diff.getNewMetadata(columns_from_entry, getContext(), *current_metadata); + applyMetadataChangesToCreateQuery(create_query, adjusted_metadata); - return query; + /// Check that tryGetTableSharedIDFromCreateQuery() works for this storage. + if (tryGetTableSharedIDFromCreateQuery(*create_query, getContext()) != getTableSharedID()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} has its shared ID to be different from one from the create query"); } void StorageReplicatedMergeTree::backupData( @@ -8310,8 +8211,8 @@ void StorageReplicatedMergeTree::backupData( auto backup_entries = backupParts(backup_entries_collector.getContext(), "", partitions); auto coordination = backup_entries_collector.getBackupCoordination(); - String full_zk_path = getZooKeeperName() + getZooKeeperPath(); - coordination->addReplicatedDataPath(full_zk_path, data_path_in_backup); + String shared_id = getTableSharedID(); + coordination->addReplicatedDataPath(shared_id, data_path_in_backup); std::unordered_map part_names_with_hashes_calculating; for (auto & [relative_path, backup_entry] : backup_entries) @@ -8349,23 +8250,23 @@ void StorageReplicatedMergeTree::backupData( } /// Send our list of part names to the coordination (to compare with other replicas). - coordination->addReplicatedPartNames(full_zk_path, getStorageID().getFullTableName(), getReplicaName(), part_names_with_hashes); + coordination->addReplicatedPartNames(shared_id, getStorageID().getFullTableName(), getReplicaName(), part_names_with_hashes); /// This task will be executed after all replicas have collected their parts and the coordination is ready to /// give us the final list of parts to add to the BackupEntriesCollector. - auto post_collecting_task = [full_zk_path, + auto post_collecting_task = [shared_id, replica_name = getReplicaName(), coordination, backup_entries = std::move(backup_entries), &backup_entries_collector]() { - Strings data_paths = coordination->getReplicatedDataPaths(full_zk_path); + Strings data_paths = coordination->getReplicatedDataPaths(shared_id); std::vector data_paths_fs; data_paths_fs.reserve(data_paths.size()); for (const auto & data_path : data_paths) data_paths_fs.push_back(data_path); - Strings part_names = coordination->getReplicatedPartNames(full_zk_path, replica_name); + Strings part_names = coordination->getReplicatedPartNames(shared_id, replica_name); std::unordered_set part_names_set{part_names.begin(), part_names.end()}; for (const auto & [relative_path, backup_entry] : backup_entries) @@ -8378,7 +8279,7 @@ void StorageReplicatedMergeTree::backupData( backup_entries_collector.addBackupEntry(data_path / relative_path, backup_entry); } }; - backup_entries_collector.addPostCollectingTask(post_collecting_task); + backup_entries_collector.addPostTask(post_collecting_task); } void StorageReplicatedMergeTree::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 73a08a2b921..18b9ef54777 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -232,8 +232,8 @@ public: int getMetadataVersion() const { return metadata_version; } - /// Returns a slightly changed version of the CREATE TABLE query which must be written to a backup. - ASTPtr getCreateQueryForBackup(const ContextPtr & context, DatabasePtr * database) const override; + /// Modify a CREATE TABLE query to make a variant which must be written to a backup. + void adjustCreateQueryForBackup(ASTPtr & create_query) const override; /// Makes backup entries to backup the data of the storage. void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) override; @@ -312,6 +312,9 @@ public: // Return table id, common for different replicas String getTableSharedID() const override; + /// Returns the same as getTableSharedID(), but extracts it from a create query. + static std::optional tryGetTableSharedIDFromCreateQuery(const IAST & create_query, const ContextPtr & global_context); + static String getDefaultZooKeeperName() { return default_zookeeper_name; } /// Check if there are new broken disks and enqueue part recovery tasks. diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index a7d9641d5c4..bed21a9affc 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -11,14 +11,12 @@ #include -#include #include #include #include #include #include -#include #include #include @@ -27,6 +25,7 @@ #include #include #include +#include #include #include @@ -1051,25 +1050,25 @@ void StorageS3::processNamedCollectionResult(StorageS3Configuration & configurat for (const auto & [arg_name, arg_value] : key_value_args) { if (arg_name == "access_key_id") - configuration.auth_settings.access_key_id = arg_value->as()->value.safeGet(); + configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(arg_value, "access_key_id"); else if (arg_name == "secret_access_key") - configuration.auth_settings.secret_access_key = arg_value->as()->value.safeGet(); + configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(arg_value, "secret_access_key"); else if (arg_name == "filename") - configuration.url = std::filesystem::path(configuration.url) / arg_value->as()->value.safeGet(); + configuration.url = std::filesystem::path(configuration.url) / checkAndGetLiteralArgument(arg_value, "filename"); else if (arg_name == "use_environment_credentials") - configuration.auth_settings.use_environment_credentials = arg_value->as()->value.safeGet(); + configuration.auth_settings.use_environment_credentials = checkAndGetLiteralArgument(arg_value, "use_environment_credentials"); else if (arg_name == "max_single_read_retries") - configuration.rw_settings.max_single_read_retries = arg_value->as()->value.safeGet(); + configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_single_read_retries"); else if (arg_name == "min_upload_part_size") - configuration.rw_settings.max_single_read_retries = arg_value->as()->value.safeGet(); + configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "min_upload_part_size"); else if (arg_name == "upload_part_size_multiply_factor") - configuration.rw_settings.max_single_read_retries = arg_value->as()->value.safeGet(); + configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_factor"); else if (arg_name == "upload_part_size_multiply_parts_count_threshold") - configuration.rw_settings.max_single_read_retries = arg_value->as()->value.safeGet(); + configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_parts_count_threshold"); else if (arg_name == "max_single_part_upload_size") - configuration.rw_settings.max_single_read_retries = arg_value->as()->value.safeGet(); + configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_single_part_upload_size"); else if (arg_name == "max_connections") - configuration.rw_settings.max_single_read_retries = arg_value->as()->value.safeGet(); + configuration.rw_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_connections"); else throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Unknown key-value argument `{}` for StorageS3, expected: url, [access_key_id, secret_access_key], name of used format and [compression_method].", @@ -1098,22 +1097,22 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - configuration.url = engine_args[0]->as().value.safeGet(); + configuration.url = checkAndGetLiteralArgument(engine_args[0], "url"); if (engine_args.size() >= 4) { - configuration.auth_settings.access_key_id = engine_args[1]->as().value.safeGet(); - configuration.auth_settings.secret_access_key = engine_args[2]->as().value.safeGet(); + configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(engine_args[1], "access_key_id"); + configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(engine_args[2], "secret_access_key"); } if (engine_args.size() == 3 || engine_args.size() == 5) { - configuration.compression_method = engine_args.back()->as().value.safeGet(); - configuration.format = engine_args[engine_args.size() - 2]->as().value.safeGet(); + configuration.compression_method = checkAndGetLiteralArgument(engine_args.back(), "compression_method"); + configuration.format = checkAndGetLiteralArgument(engine_args[engine_args.size() - 2], "format"); } else if (engine_args.size() != 1) { configuration.compression_method = "auto"; - configuration.format = engine_args.back()->as().value.safeGet(); + configuration.format = checkAndGetLiteralArgument(engine_args.back(), "format"); } } diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 1eb473af80d..a86ed7646b3 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -1,12 +1,10 @@ #include "StorageSQLite.h" #if USE_SQLITE -#include #include #include #include #include -#include #include #include #include @@ -16,6 +14,7 @@ #include #include #include +#include #include #include @@ -168,8 +167,8 @@ void registerStorageSQLite(StorageFactory & factory) for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.getLocalContext()); - const auto database_path = engine_args[0]->as().value.safeGet(); - const auto table_name = engine_args[1]->as().value.safeGet(); + const auto database_path = checkAndGetLiteralArgument(engine_args[0], "database_path"); + const auto table_name = checkAndGetLiteralArgument(engine_args[1], "table_name"); auto sqlite_db = openSQLiteDB(database_path, args.getContext(), /* throw_on_error */!args.attach); diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index ad63499acfa..2f586a3c26c 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -11,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index d935d73d03d..b47623db50b 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include namespace DB diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index d569a81c4a7..e25db92be64 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -1,8 +1,6 @@ #include #include -#include -#include #include #include @@ -12,7 +10,6 @@ #include #include #include -#include #include #include @@ -21,11 +18,8 @@ #include -#include - #include -#include #include #include #include @@ -55,6 +49,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INCORRECT_FILE_NAME; extern const int TIMEOUT_EXCEEDED; + extern const int CANNOT_RESTORE_TABLE; } @@ -527,11 +522,8 @@ std::optional StorageStripeLog::totalBytes(const Settings &) const } -void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) +void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - auto lock_timeout = getLockTimeout(backup_entries_collector.getContext()); loadIndices(lock_timeout); @@ -589,13 +581,13 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec data_path_in_backup_fs / "count.txt", std::make_unique(toString(num_rows))); } -void StorageStripeLog::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) +void StorageStripeLog::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto backup = restorer.getBackup(); - if (!restorer.isNonEmptyTableAllowed() && total_bytes && backup->hasFiles(data_path_in_backup)) + if (!backup->hasFiles(data_path_in_backup)) + return; + + if (!restorer.isNonEmptyTableAllowed() && total_bytes) RestorerFromBackup::throwTableIsNotEmpty(getStorageID()); auto lock_timeout = getLockTimeout(restorer.getContext()); @@ -624,6 +616,11 @@ void StorageStripeLog::restoreDataImpl(const BackupPtr & backup, const String & auto old_data_size = file_checker.getFileSize(data_file_path); { String file_path_in_backup = data_path_in_backup_fs / fileName(data_file_path); + if (!backup->fileExists(file_path_in_backup)) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", + getStorageID().getFullTableName(), file_path_in_backup); + } auto backup_entry = backup->readFile(file_path_in_backup); auto in = backup_entry->getReadBuffer(); auto out = disk->writeFile(data_file_path, max_compress_block_size, WriteMode::Append); @@ -634,6 +631,11 @@ void StorageStripeLog::restoreDataImpl(const BackupPtr & backup, const String & { String index_path_in_backup = data_path_in_backup_fs / fileName(index_file_path); IndexForNativeFormat extra_indices; + if (!backup->fileExists(index_path_in_backup)) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", + getStorageID().getFullTableName(), index_path_in_backup); + } auto backup_entry = backup->readFile(index_path_in_backup); auto index_in = backup_entry->getReadBuffer(); CompressedReadBuffer index_compressed_in{*index_in}; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index cbec49865a1..15ae23305f3 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -1,6 +1,8 @@ #include +#include +#include +#include -#include #include #include #include @@ -21,17 +23,15 @@ #include #include #include - -#include -#include -#include "Common/ThreadStatus.h" -#include -#include "IO/HTTPCommon.h" -#include "IO/ReadWriteBufferFromHTTP.h" - -#include #include #include + +#include +#include +#include +#include + +#include #include #include #include @@ -960,11 +960,11 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex if (header_it != args.end()) args.erase(header_it); - configuration.url = args[0]->as().value.safeGet(); + configuration.url = checkAndGetLiteralArgument(args[0], "url"); if (args.size() > 1) - configuration.format = args[1]->as().value.safeGet(); + configuration.format = checkAndGetLiteralArgument(args[1], "format"); if (args.size() == 3) - configuration.compression_method = args[2]->as().value.safeGet(); + configuration.compression_method = checkAndGetLiteralArgument(args[2], "compression_method"); } if (configuration.format == "auto") diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index f44daf2557e..0b7a1ae75d4 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -1,16 +1,16 @@ -#include "StorageXDBC.h" +#include +#include +#include +#include +#include #include -#include #include #include #include #include #include #include -#include -#include -#include #include #include @@ -173,11 +173,11 @@ namespace BridgeHelperPtr bridge_helper = std::make_shared>(args.getContext(), args.getContext()->getSettingsRef().http_receive_timeout.value, - engine_args[0]->as().value.safeGet()); + checkAndGetLiteralArgument(engine_args[0], "connection_string")); return std::make_shared( args.table_id, - engine_args[1]->as().value.safeGet(), - engine_args[2]->as().value.safeGet(), + checkAndGetLiteralArgument(engine_args[1], "database_name"), + checkAndGetLiteralArgument(engine_args[2], "table_name"), args.columns, args.comment, args.getContext(), diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp index b3f1231bd1a..e2bc699d3f1 100644 --- a/src/Storages/System/StorageSystemFunctions.cpp +++ b/src/Storages/System/StorageSystemFunctions.cpp @@ -9,6 +9,16 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + namespace DB { @@ -20,6 +30,11 @@ enum class FunctionOrigin : Int8 EXECUTABLE_USER_DEFINED = 2 }; +namespace ErrorCodes +{ + extern const int CANNOT_RESTORE_TABLE; +} + namespace { template @@ -99,4 +114,66 @@ void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr c fillRow(res_columns, function_name, UInt64(0), "", FunctionOrigin::EXECUTABLE_USER_DEFINED, user_defined_executable_functions_factory); } } + +void StorageSystemFunctions::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) +{ + const auto & user_defined_sql_functions_factory = UserDefinedSQLFunctionFactory::instance(); + const auto & user_defined_sql_functions_names = user_defined_sql_functions_factory.getAllRegisteredNames(); + fs::path data_path_in_backup_fs{data_path_in_backup}; + for (const auto & function_name : user_defined_sql_functions_names) + { + auto ast = user_defined_sql_functions_factory.tryGet(function_name); + if (!ast) + continue; + backup_entries_collector.addBackupEntry( + data_path_in_backup_fs / (escapeForFileName(function_name) + ".sql"), + std::make_shared(queryToString(ast))); + } +} + +void StorageSystemFunctions::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) +{ + auto backup = restorer.getBackup(); + fs::path data_path_in_backup_fs{data_path_in_backup}; + + Strings filenames = backup->listFiles(data_path_in_backup); + for (const auto & filename : filenames) + { + if (!filename.ends_with(".sql")) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File name {} doesn't have the extension .sql", + getStorageID().getFullTableName(), String{data_path_in_backup_fs / filename}); + } + } + + auto & user_defined_sql_functions_factory = UserDefinedSQLFunctionFactory::instance(); + const auto & restore_settings = restorer.getRestoreSettings(); + auto context = restorer.getContext(); + + for (const auto & filename : filenames) + { + String escaped_function_name = filename.substr(0, filename.length() - strlen(".sql")); + String function_name = unescapeForFileName(escaped_function_name); + + String filepath = data_path_in_backup_fs / filename; + auto function_def_entry = backup->readFile(filepath); + auto function_def_in = function_def_entry->getReadBuffer(); + String function_def; + readStringUntilEOF(function_def, *function_def_in); + + ParserCreateFunctionQuery parser; + ASTPtr ast = parseQuery( + parser, + function_def.data(), + function_def.data() + function_def.size(), + "in file " + filepath + " from backup " + backup->getName(), + 0, + context->getSettingsRef().max_parser_depth); + + bool replace = (restore_settings.create_function == RestoreUDFCreationMode::kReplace); + bool if_not_exists = (restore_settings.create_function == RestoreUDFCreationMode::kCreateIfNotExists); + user_defined_sql_functions_factory.registerFunction(context, function_name, ast, replace, if_not_exists, true); + } +} + } diff --git a/src/Storages/System/StorageSystemFunctions.h b/src/Storages/System/StorageSystemFunctions.h index fdbe79e29a2..606694a4c0b 100644 --- a/src/Storages/System/StorageSystemFunctions.h +++ b/src/Storages/System/StorageSystemFunctions.h @@ -19,6 +19,9 @@ public: static NamesAndTypesList getNamesAndTypes(); + void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) override; + void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; + protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemQuotas.cpp b/src/Storages/System/StorageSystemQuotas.cpp index efe6b93fe57..046db151684 100644 --- a/src/Storages/System/StorageSystemQuotas.cpp +++ b/src/Storages/System/StorageSystemQuotas.cpp @@ -122,23 +122,17 @@ void StorageSystemQuotas::fillData(MutableColumns & res_columns, ContextPtr cont } void StorageSystemQuotas::backupData( - BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) + BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); - access_control.backup(backup_entries_collector, AccessEntityType::QUOTA, data_path_in_backup); + access_control.backup(backup_entries_collector, data_path_in_backup, AccessEntityType::QUOTA); } void StorageSystemQuotas::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) + RestorerFromBackup & restorer, const String & /* data_path_in_backup */, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto & access_control = restorer.getContext()->getAccessControl(); - access_control.restore(restorer, data_path_in_backup); + access_control.restoreFromBackup(restorer); } } diff --git a/src/Storages/System/StorageSystemRoles.cpp b/src/Storages/System/StorageSystemRoles.cpp index ff3490ce8ba..e5b8d53ce7e 100644 --- a/src/Storages/System/StorageSystemRoles.cpp +++ b/src/Storages/System/StorageSystemRoles.cpp @@ -60,23 +60,17 @@ void StorageSystemRoles::fillData(MutableColumns & res_columns, ContextPtr conte } void StorageSystemRoles::backupData( - BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) + BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); - access_control.backup(backup_entries_collector, AccessEntityType::ROLE, data_path_in_backup); + access_control.backup(backup_entries_collector, data_path_in_backup, AccessEntityType::ROLE); } void StorageSystemRoles::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) + RestorerFromBackup & restorer, const String & /* data_path_in_backup */, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto & access_control = restorer.getContext()->getAccessControl(); - access_control.restore(restorer, data_path_in_backup); + access_control.restoreFromBackup(restorer); } } diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index 680f90adff7..064f610730d 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -140,23 +140,17 @@ void StorageSystemRowPolicies::fillData(MutableColumns & res_columns, ContextPtr } void StorageSystemRowPolicies::backupData( - BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) + BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); - access_control.backup(backup_entries_collector, AccessEntityType::ROW_POLICY, data_path_in_backup); + access_control.backup(backup_entries_collector, data_path_in_backup, AccessEntityType::ROW_POLICY); } void StorageSystemRowPolicies::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) + RestorerFromBackup & restorer, const String & /* data_path_in_backup */, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto & access_control = restorer.getContext()->getAccessControl(); - access_control.restore(restorer, data_path_in_backup); + access_control.restoreFromBackup(restorer); } } diff --git a/src/Storages/System/StorageSystemSettingsProfiles.cpp b/src/Storages/System/StorageSystemSettingsProfiles.cpp index 7c3ccfe863a..d03848ba68b 100644 --- a/src/Storages/System/StorageSystemSettingsProfiles.cpp +++ b/src/Storages/System/StorageSystemSettingsProfiles.cpp @@ -87,23 +87,17 @@ void StorageSystemSettingsProfiles::fillData(MutableColumns & res_columns, Conte } void StorageSystemSettingsProfiles::backupData( - BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) + BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); - access_control.backup(backup_entries_collector, AccessEntityType::SETTINGS_PROFILE, data_path_in_backup); + access_control.backup(backup_entries_collector, data_path_in_backup, AccessEntityType::SETTINGS_PROFILE); } void StorageSystemSettingsProfiles::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) + RestorerFromBackup & restorer, const String & /* data_path_in_backup */, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto & access_control = restorer.getContext()->getAccessControl(); - access_control.restore(restorer, data_path_in_backup); + access_control.restoreFromBackup(restorer); } } diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index f2cae638d45..be56abfa3e8 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -215,23 +215,17 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte } void StorageSystemUsers::backupData( - BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) + BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); - access_control.backup(backup_entries_collector, AccessEntityType::USER, data_path_in_backup); + access_control.backup(backup_entries_collector, data_path_in_backup, AccessEntityType::USER); } void StorageSystemUsers::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) + RestorerFromBackup & restorer, const String & /* data_path_in_backup */, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto & access_control = restorer.getContext()->getAccessControl(); - access_control.restore(restorer, data_path_in_backup); + access_control.restoreFromBackup(restorer); } } diff --git a/src/Storages/RabbitMQ/UVLoop.h b/src/Storages/UVLoop.h similarity index 94% rename from src/Storages/RabbitMQ/UVLoop.h rename to src/Storages/UVLoop.h index 4de67cbc206..66668739dd7 100644 --- a/src/Storages/RabbitMQ/UVLoop.h +++ b/src/Storages/UVLoop.h @@ -2,8 +2,8 @@ #include -#include #include +#include #include @@ -19,7 +19,7 @@ namespace ErrorCodes class UVLoop : public boost::noncopyable { public: - UVLoop(): loop_ptr(new uv_loop_t()) + UVLoop() : loop_ptr(new uv_loop_t()) { int res = uv_loop_init(loop_ptr.get()); diff --git a/src/Storages/checkAndGetLiteralArgument.cpp b/src/Storages/checkAndGetLiteralArgument.cpp new file mode 100644 index 00000000000..3c43ce98920 --- /dev/null +++ b/src/Storages/checkAndGetLiteralArgument.cpp @@ -0,0 +1,40 @@ +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +template +T checkAndGetLiteralArgument(const ASTPtr & arg, const String & arg_name) +{ + return checkAndGetLiteralArgument(*arg->as(), arg_name); +} + +template +T checkAndGetLiteralArgument(const ASTLiteral & arg, const String & arg_name) +{ + auto requested_type = Field::TypeToEnum>>::value; + auto provided_type = arg.value.getType(); + if (requested_type != provided_type) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Argument '{}' must be a literal with type {}, got {}", + arg_name, + fieldTypeToString(requested_type), + fieldTypeToString(provided_type)); + + return arg.value.safeGet(); +} + +template String checkAndGetLiteralArgument(const ASTPtr &, const String &); +template UInt64 checkAndGetLiteralArgument(const ASTPtr &, const String &); +template UInt8 checkAndGetLiteralArgument(const ASTPtr &, const String &); +template bool checkAndGetLiteralArgument(const ASTPtr &, const String &); +template String checkAndGetLiteralArgument(const ASTLiteral &, const String &); + +} diff --git a/src/Storages/checkAndGetLiteralArgument.h b/src/Storages/checkAndGetLiteralArgument.h new file mode 100644 index 00000000000..086deca5121 --- /dev/null +++ b/src/Storages/checkAndGetLiteralArgument.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +namespace DB +{ + +template +T checkAndGetLiteralArgument(const ASTPtr & arg, const String & arg_name); + +template +T checkAndGetLiteralArgument(const ASTLiteral & arg, const String & arg_name); + +} diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 585e85688fc..575b3de7ae2 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -54,6 +54,7 @@ void registerStorageMySQL(StorageFactory & factory); void registerStorageMongoDB(StorageFactory & factory); + #if USE_RDKAFKA void registerStorageKafka(StorageFactory & factory); #endif @@ -62,6 +63,10 @@ void registerStorageKafka(StorageFactory & factory); void registerStorageRabbitMQ(StorageFactory & factory); #endif +#if USE_NATSIO +void registerStorageNATS(StorageFactory & factory); +#endif + #if USE_ROCKSDB void registerStorageEmbeddedRocksDB(StorageFactory & factory); #endif @@ -146,6 +151,10 @@ void registerStorages() registerStorageRabbitMQ(factory); #endif + #if USE_NATSIO + registerStorageNATS(factory); + #endif + #if USE_ROCKSDB registerStorageEmbeddedRocksDB(factory); #endif diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp index 99dded030e5..12371df4e3c 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.cpp +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -2,11 +2,9 @@ #if USE_HIVE #include -#include #include #include #include -#include #include #include #include @@ -14,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -44,11 +43,11 @@ namespace DB for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context_); - hive_metastore_url = args[0]->as().value.safeGet(); - hive_database = args[1]->as().value.safeGet(); - hive_table = args[2]->as().value.safeGet(); - table_structure = args[3]->as().value.safeGet(); - partition_by_def = args[4]->as().value.safeGet(); + hive_metastore_url = checkAndGetLiteralArgument(args[0], "hive_url"); + hive_database = checkAndGetLiteralArgument(args[1], "hive_database"); + hive_table = checkAndGetLiteralArgument(args[2], "hive_table"); + table_structure = checkAndGetLiteralArgument(args[3], "structure"); + partition_by_def = checkAndGetLiteralArgument(args[4], "partition_by_keys"); actual_columns = parseColumnsListFromString(table_structure, context_); } diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index 7fa3ccda195..e2391787726 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -8,11 +8,10 @@ #include #include +#include #include -#include - #include namespace DB @@ -25,10 +24,9 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -void ITableFunctionFileLike::parseFirstArguments(const ASTPtr & arg, ContextPtr context) +void ITableFunctionFileLike::parseFirstArguments(const ASTPtr & arg, const ContextPtr &) { - auto ast = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); - filename = ast->as().value.safeGet(); + filename = checkAndGetLiteralArgument(arg, "source"); } String ITableFunctionFileLike::getFormatFromFirstArgument() @@ -49,13 +47,13 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context if (args.empty()) throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + parseFirstArguments(args[0], context); - for (size_t i = 1; i < args.size(); ++i) - args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); - if (args.size() > 1) - format = args[1]->as().value.safeGet(); + format = checkAndGetLiteralArgument(args[1], "format"); if (format == "auto") format = getFormatFromFirstArgument(); @@ -67,7 +65,7 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context throw Exception("Table function '" + getName() + "' requires 1, 2, 3 or 4 arguments: filename, format (default auto), structure (default auto) and compression method (default auto)", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - structure = args[2]->as().value.safeGet(); + structure = checkAndGetLiteralArgument(args[2], "structure"); if (structure.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -75,7 +73,7 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context ast_function->formatForErrorMessage()); if (args.size() == 4) - compression_method = args[3]->as().value.safeGet(); + compression_method = checkAndGetLiteralArgument(args[3], "compression_method"); } StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index 88ad75b1018..c2f32eb0aa3 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -20,7 +20,7 @@ public: protected: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - virtual void parseFirstArguments(const ASTPtr & arg, ContextPtr context); + virtual void parseFirstArguments(const ASTPtr & arg, const ContextPtr & context); virtual String getFormatFromFirstArgument(); String filename; diff --git a/src/TableFunctions/TableFunctionDictionary.cpp b/src/TableFunctions/TableFunctionDictionary.cpp index c251b2703e1..54c23cfb64b 100644 --- a/src/TableFunctions/TableFunctionDictionary.cpp +++ b/src/TableFunctions/TableFunctionDictionary.cpp @@ -7,6 +7,7 @@ #include #include +#include #include @@ -35,7 +36,7 @@ void TableFunctionDictionary::parseArguments(const ASTPtr & ast_function, Contex for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); - dictionary_name = args[0]->as().value.safeGet(); + dictionary_name = checkAndGetLiteralArgument(args[0], "dictionary_name"); } ColumnsDescription TableFunctionDictionary::getActualTableStructure(ContextPtr context) const diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index dc88cca51e6..b84008f5ac8 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -4,8 +4,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -43,16 +43,16 @@ void TableFunctionExecutable::parseArguments(const ASTPtr & ast_function, Contex for (size_t i = 0; i <= 2; ++i) args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); - auto scipt_name_with_arguments_value = args[0]->as().value.safeGet(); + auto script_name_with_arguments_value = checkAndGetLiteralArgument(args[0], "script_name_with_arguments_value"); std::vector script_name_with_arguments; - boost::split(script_name_with_arguments, scipt_name_with_arguments_value, [](char c){ return c == ' '; }); + boost::split(script_name_with_arguments, script_name_with_arguments_value, [](char c){ return c == ' '; }); script_name = script_name_with_arguments[0]; script_name_with_arguments.erase(script_name_with_arguments.begin()); arguments = std::move(script_name_with_arguments); - format = args[1]->as().value.safeGet(); - structure = args[2]->as().value.safeGet(); + format = checkAndGetLiteralArgument(args[1], "format"); + structure = checkAndGetLiteralArgument(args[2], "structure"); for (size_t i = 3; i < args.size(); ++i) { diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 507b3406cb8..6f8f0db46a0 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, ContextPtr context) +void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr & context) { if (context->getApplicationType() != Context::ApplicationType::LOCAL) { @@ -29,36 +29,27 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, ContextPtr conte return; } - if (auto opt_name = tryGetIdentifierName(arg)) + const auto * literal = arg->as(); + auto type = literal->value.getType(); + if (type == Field::Types::String) { - if (*opt_name == "stdin") + filename = literal->value.safeGet(); + if (filename == "stdin" || filename == "-") fd = STDIN_FILENO; - else if (*opt_name == "stdout") + else if (filename == "stdout") fd = STDOUT_FILENO; - else if (*opt_name == "stderr") + else if (filename == "stderr") fd = STDERR_FILENO; - else - filename = *opt_name; } - else if (const auto * literal = arg->as()) + else if (type == Field::Types::Int64 || type == Field::Types::UInt64) { - auto type = literal->value.getType(); - if (type == Field::Types::Int64 || type == Field::Types::UInt64) - { - fd = (type == Field::Types::Int64) ? static_cast(literal->value.get()) : static_cast(literal->value.get()); - if (fd < 0) - throw Exception("File descriptor must be non-negative", ErrorCodes::BAD_ARGUMENTS); - } - else if (type == Field::Types::String) - { - filename = literal->value.get(); - if (filename == "-") - fd = STDIN_FILENO; - } - else - throw Exception( - "The first argument of table function '" + getName() + "' mush be path or file descriptor", ErrorCodes::BAD_ARGUMENTS); + fd = (type == Field::Types::Int64) ? literal->value.get() : literal->value.get(); + if (fd < 0) + throw Exception("File descriptor must be non-negative", ErrorCodes::BAD_ARGUMENTS); } + else + throw Exception( + "The first argument of table function '" + getName() + "' mush be path or file descriptor", ErrorCodes::BAD_ARGUMENTS); } String TableFunctionFile::getFormatFromFirstArgument() diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h index f956043e69a..20ecdb6222c 100644 --- a/src/TableFunctions/TableFunctionFile.h +++ b/src/TableFunctions/TableFunctionFile.h @@ -24,7 +24,7 @@ public: protected: int fd = -1; - void parseFirstArguments(const ASTPtr & arg, ContextPtr context) override; + void parseFirstArguments(const ASTPtr & arg, const ContextPtr & context) override; String getFormatFromFirstArgument() override; private: diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp index d3ce9627598..d47f8353e18 100644 --- a/src/TableFunctions/TableFunctionFormat.cpp +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -43,8 +44,8 @@ void TableFunctionFormat::parseArguments(const ASTPtr & ast_function, ContextPtr for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); - format = args[0]->as().value.safeGet(); - data = args[1]->as().value.safeGet(); + format = checkAndGetLiteralArgument(args[0], "format"); + data = checkAndGetLiteralArgument(args[1], "data"); } ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr context) const diff --git a/src/TableFunctions/TableFunctionGenerateRandom.cpp b/src/TableFunctions/TableFunctionGenerateRandom.cpp index ad766c6c66e..083e4a54190 100644 --- a/src/TableFunctions/TableFunctionGenerateRandom.cpp +++ b/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -58,20 +59,20 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co } /// Parsing first argument as table structure and creating a sample block - structure = args[0]->as().value.safeGet(); + structure = checkAndGetLiteralArgument(args[0], "structure"); if (args.size() >= 2) { - const Field & value = args[1]->as().value; - if (!value.isNull()) - random_seed = value.safeGet(); + const auto & literal = args[1]->as(); + if (!literal.value.isNull()) + random_seed = checkAndGetLiteralArgument(literal, "random_seed"); } if (args.size() >= 3) - max_string_length = args[2]->as().value.safeGet(); + max_string_length = checkAndGetLiteralArgument(args[2], "max_string_length"); if (args.size() == 4) - max_array_length = args[3]->as().value.safeGet(); + max_array_length = checkAndGetLiteralArgument(args[3], "max_string_length"); } ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextPtr context) const diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp index 80f19cd015a..b5e14a91b91 100644 --- a/src/TableFunctions/TableFunctionHDFSCluster.cpp +++ b/src/TableFunctions/TableFunctionHDFSCluster.cpp @@ -5,8 +5,8 @@ #include #include -#include #include +#include #include #include #include @@ -15,8 +15,6 @@ #include #include #include -#include -#include #include #include "registerTableFunctions.h" @@ -61,7 +59,7 @@ void TableFunctionHDFSCluster::parseArguments(const ASTPtr & ast_function, Conte arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); /// This argument is always the first - cluster_name = args[0]->as().value.safeGet(); + cluster_name = checkAndGetLiteralArgument(args[0], "cluster_name"); if (!context->tryGetCluster(cluster_name)) throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name); diff --git a/src/TableFunctions/TableFunctionInput.cpp b/src/TableFunctions/TableFunctionInput.cpp index 0ff56fefb68..0f26cab3683 100644 --- a/src/TableFunctions/TableFunctionInput.cpp +++ b/src/TableFunctions/TableFunctionInput.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -6,6 +5,7 @@ #include #include #include +#include #include #include #include "registerTableFunctions.h" @@ -40,7 +40,7 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr throw Exception("Table function '" + getName() + "' requires exactly 1 argument: structure", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - structure = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context)->as().value.safeGet(); + structure = checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context), "structure"); } ColumnsDescription TableFunctionInput::getActualTableStructure(ContextPtr context) const diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp index 28aed2f03ed..b055e241459 100644 --- a/src/TableFunctions/TableFunctionMerge.cpp +++ b/src/TableFunctions/TableFunctionMerge.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -58,10 +59,10 @@ void TableFunctionMerge::parseArguments(const ASTPtr & ast_function, ContextPtr if (!is_regexp) args[0] = database_ast; - source_database_name_or_regexp = database_ast->as().value.safeGet(); + source_database_name_or_regexp = checkAndGetLiteralArgument(database_ast, "database_name"); args[1] = evaluateConstantExpressionAsLiteral(args[1], context); - source_table_regexp = args[1]->as().value.safeGet(); + source_table_regexp = checkAndGetLiteralArgument(args[1], "table_name_regexp"); } diff --git a/src/TableFunctions/TableFunctionNull.cpp b/src/TableFunctions/TableFunctionNull.cpp index dea95b86ffd..f5d5a92ec1a 100644 --- a/src/TableFunctions/TableFunctionNull.cpp +++ b/src/TableFunctions/TableFunctionNull.cpp @@ -1,9 +1,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -30,7 +30,7 @@ void TableFunctionNull::parseArguments(const ASTPtr & ast_function, ContextPtr c ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (!arguments.empty()) - structure = evaluateConstantExpressionOrIdentifierAsLiteral(arguments[0], context)->as()->value.safeGet(); + structure = checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(arguments[0], context), "structure"); } ColumnsDescription TableFunctionNull::getActualTableStructure(ContextPtr context) const diff --git a/src/TableFunctions/TableFunctionPostgreSQL.cpp b/src/TableFunctions/TableFunctionPostgreSQL.cpp index 7e7424be38f..d61140e1a07 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.cpp +++ b/src/TableFunctions/TableFunctionPostgreSQL.cpp @@ -62,9 +62,13 @@ void TableFunctionPostgreSQL::parseArguments(const ASTPtr & ast_function, Contex throw Exception("Table function 'PostgreSQL' must have arguments.", ErrorCodes::BAD_ARGUMENTS); configuration.emplace(StoragePostgreSQL::getConfiguration(func_args.arguments->children, context)); - connection_pool = std::make_shared(*configuration, - context->getSettingsRef().postgresql_connection_pool_size, - context->getSettingsRef().postgresql_connection_pool_wait_timeout); + const auto & settings = context->getSettingsRef(); + connection_pool = std::make_shared( + *configuration, + settings.postgresql_connection_pool_size, + settings.postgresql_connection_pool_wait_timeout, + POSTGRESQL_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, + settings.postgresql_connection_pool_auto_close_connection); } diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index f06831f191e..098756bcd7c 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -13,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -79,7 +80,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr else { auto database_literal = evaluateConstantExpressionOrIdentifierAsLiteral(arg_value, context); - configuration.database = database_literal->as()->value.safeGet(); + configuration.database = checkAndGetLiteralArgument(database_literal, "database"); } } else @@ -113,7 +114,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr if (is_cluster_function) { args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); - cluster_name = args[arg_num]->as().value.safeGet(); + cluster_name = checkAndGetLiteralArgument(args[arg_num], "cluster_name"); } else { @@ -134,7 +135,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr else { args[arg_num] = evaluateConstantExpressionForDatabaseName(args[arg_num], context); - configuration.database = args[arg_num]->as().value.safeGet(); + configuration.database = checkAndGetLiteralArgument(args[arg_num], "database"); ++arg_num; @@ -149,7 +150,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr { std::swap(qualified_name.database, qualified_name.table); args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); - qualified_name.table = args[arg_num]->as().value.safeGet(); + qualified_name.table = checkAndGetLiteralArgument(args[arg_num], "table"); ++arg_num; } } diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index d081ec4319d..101d946a3f9 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include "registerTableFunctions.h" @@ -56,7 +57,7 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar /// We can distinguish them by looking at the 2-nd argument: check if it's a format name or not. if (args.size() == 4) { - auto second_arg = args[1]->as().value.safeGet(); + auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id"); if (FormatFactory::instance().getAllFormats().contains(second_arg)) args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; @@ -68,7 +69,8 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar /// We can distinguish them by looking at the 2-nd argument: check if it's a format name or not. else if (args.size() == 3) { - auto second_arg = args[1]->as().value.safeGet(); + + auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id"); if (FormatFactory::instance().getAllFormats().contains(second_arg)) args_to_idx = {{"format", 1}, {"structure", 2}}; else @@ -80,22 +82,22 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar } /// This argument is always the first - s3_configuration.url = args[0]->as().value.safeGet(); + s3_configuration.url = checkAndGetLiteralArgument(args[0], "url"); if (args_to_idx.contains("format")) - s3_configuration.format = args[args_to_idx["format"]]->as().value.safeGet(); + s3_configuration.format = checkAndGetLiteralArgument(args[args_to_idx["format"]], "format"); if (args_to_idx.contains("structure")) - s3_configuration.structure = args[args_to_idx["structure"]]->as().value.safeGet(); + s3_configuration.structure = checkAndGetLiteralArgument(args[args_to_idx["structure"]], "structure"); if (args_to_idx.contains("compression_method")) - s3_configuration.compression_method = args[args_to_idx["compression_method"]]->as().value.safeGet(); + s3_configuration.compression_method = checkAndGetLiteralArgument(args[args_to_idx["compression_method"]], "compression_method"); if (args_to_idx.contains("access_key_id")) - s3_configuration.auth_settings.access_key_id = args[args_to_idx["access_key_id"]]->as().value.safeGet(); + s3_configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(args[args_to_idx["access_key_id"]], "access_key_id"); if (args_to_idx.contains("secret_access_key")) - s3_configuration.auth_settings.secret_access_key = args[args_to_idx["secret_access_key"]]->as().value.safeGet(); + s3_configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(args[args_to_idx["secret_access_key"]], "secret_access_key"); } if (s3_configuration.format == "auto") diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index 2f558c58352..fab74c07e11 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -3,11 +3,11 @@ #if USE_AWS_S3 #include +#include +#include #include -#include #include -#include #include #include #include @@ -17,7 +17,6 @@ #include #include #include -#include #include #include "registerTableFunctions.h" @@ -65,7 +64,7 @@ void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, Context throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); /// This arguments are always the first - configuration.cluster_name = args[0]->as().value.safeGet(); + configuration.cluster_name = checkAndGetLiteralArgument(args[0], "cluster_name"); if (!context->tryGetCluster(configuration.cluster_name)) throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", configuration.cluster_name); diff --git a/src/TableFunctions/TableFunctionSQLite.cpp b/src/TableFunctions/TableFunctionSQLite.cpp index fb2dc90a1f7..64ff93494db 100644 --- a/src/TableFunctions/TableFunctionSQLite.cpp +++ b/src/TableFunctions/TableFunctionSQLite.cpp @@ -10,14 +10,14 @@ #include "registerTableFunctions.h" #include -#include #include -#include #include #include +#include + namespace DB { @@ -73,8 +73,8 @@ void TableFunctionSQLite::parseArguments(const ASTPtr & ast_function, ContextPtr for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); - database_path = args[0]->as().value.safeGet(); - remote_table_name = args[1]->as().value.safeGet(); + database_path = checkAndGetLiteralArgument(args[0], "database_path"); + remote_table_name = checkAndGetLiteralArgument(args[1], "table_name"); sqlite_db = openSQLiteDB(database_path, context); } diff --git a/src/TableFunctions/TableFunctionZeros.cpp b/src/TableFunctions/TableFunctionZeros.cpp index fdc8c4ac911..3baa09a65ea 100644 --- a/src/TableFunctions/TableFunctionZeros.cpp +++ b/src/TableFunctions/TableFunctionZeros.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include @@ -55,7 +55,7 @@ void registerTableFunctionZeros(TableFunctionFactory & factory) template UInt64 TableFunctionZeros::evaluateArgument(ContextPtr context, ASTPtr & argument) const { - return evaluateConstantExpressionOrIdentifierAsLiteral(argument, context)->as().value.safeGet(); + return checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(argument, context), "length"); } } diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 45e45b505d4..e84d1645fdf 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -49,6 +49,9 @@ endif() if (TARGET ch_contrib::amqp_cpp) set(USE_AMQPCPP 1) endif() +if (TARGET ch_contrib::nats_io) + set(USE_NATSIO 1) +endif() if (TARGET ch_contrib::cassandra) set(USE_CASSANDRA 1) endif() diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 67e1c6ee85d..f5eb72dddee 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -20,15 +20,17 @@ def get_with_retries( sleep: int = 3, **kwargs, ) -> requests.Response: - logging.info("Getting URL with %i and sleep %i in between: %s", retries, sleep, url) + logging.info( + "Getting URL with %i tries and sleep %i in between: %s", retries, sleep, url + ) exc = None # type: Optional[Exception] - for i in range(DOWNLOAD_RETRIES_COUNT): + for i in range(retries): try: response = requests.get(url, **kwargs) response.raise_for_status() break except Exception as e: - if i + 1 < DOWNLOAD_RETRIES_COUNT: + if i + 1 < retries: logging.info("Exception '%s' while getting, retry %i", e, i + 1) time.sleep(sleep) diff --git a/tests/ci/cherry_pick_utils/cherrypick.py b/tests/ci/cherry_pick_utils/cherrypick.py index 92c87800828..c844beaee88 100644 --- a/tests/ci/cherry_pick_utils/cherrypick.py +++ b/tests/ci/cherry_pick_utils/cherrypick.py @@ -165,7 +165,7 @@ class CherryPick: "user.name=robot-clickhouse", ] - title = (self._pr["title"].replace('"', r"\""),) + title = self._pr["title"].replace('"', r"\"") pr_title = f"Backport #{self._pr['number']} to {self.target_branch}: {title}" self._run(git_prefix + ["checkout", "-f", self.backport_branch]) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index a0d0e124f6d..2e181f678dd 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -210,6 +210,7 @@ def build_and_push_dummy_image( def build_and_push_one_image( image: DockerImage, version_string: str, + additional_cache: str, push: bool, child: bool, ) -> Tuple[bool, str]: @@ -232,6 +233,16 @@ def build_and_push_one_image( if child: from_tag_arg = f"--build-arg FROM_TAG={version_string} " + cache_from = ( + f"--cache-from type=registry,ref={image.repo}:{version_string} " + f"--cache-from type=registry,ref={image.repo}:latest" + ) + if additional_cache: + cache_from = ( + f"{cache_from} " + f"--cache-from type=registry,ref={image.repo}:{additional_cache}" + ) + with open(build_log, "wb") as bl: cmd = ( "docker buildx build --builder default " @@ -240,8 +251,7 @@ def build_and_push_one_image( # A hack to invalidate cache, grep for it in docker/ dir f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " f"--tag {image.repo}:{version_string} " - f"--cache-from type=registry,ref={image.repo}:{version_string} " - f"--cache-from type=registry,ref={image.repo}:latest " + f"{cache_from} " f"--cache-to type=inline,mode=max " f"{push_arg}" f"--progress plain {image.full_path}" @@ -260,6 +270,7 @@ def build_and_push_one_image( def process_single_image( image: DockerImage, versions: List[str], + additional_cache, push: bool, child: bool, ) -> List[Tuple[str, str, str]]: @@ -267,7 +278,9 @@ def process_single_image( result = [] for ver in versions: for i in range(5): - success, build_log = build_and_push_one_image(image, ver, push, child) + success, build_log = build_and_push_one_image( + image, ver, additional_cache, push, child + ) if success: result.append((image.repo + ":" + ver, build_log, "OK")) break @@ -284,17 +297,23 @@ def process_single_image( def process_image_with_parents( - image: DockerImage, versions: List[str], push: bool, child: bool = False + image: DockerImage, + versions: List[str], + additional_cache: str, + push: bool, + child: bool = False, ) -> List[Tuple[str, str, str]]: result = [] # type: List[Tuple[str,str,str]] if image.built: return result if image.parent is not None: - result += process_image_with_parents(image.parent, versions, push, False) + result += process_image_with_parents( + image.parent, versions, additional_cache, push, False + ) child = True - result += process_single_image(image, versions, push, child) + result += process_single_image(image, versions, additional_cache, push, child) return result @@ -423,8 +442,10 @@ def main(): result_images = {} images_processing_result = [] for image in changed_images: + # If we are in backport PR, then pr_info.release_pr is defined + # We use it as tag to reduce rebuilding time images_processing_result += process_image_with_parents( - image, image_versions, args.push + image, image_versions, pr_info.release_pr, args.push ) result_images[image.repo] = result_version diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 550d495939c..32df6d5f1d0 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -117,7 +117,7 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.return_value.__enter__.return_value.wait.return_value = 0 image = di.DockerImage("path", "name", False, gh_repo_path="") - result, _ = di.build_and_push_one_image(image, "version", True, True) + result, _ = di.build_and_push_one_image(image, "version", "", True, True) mock_open.assert_called_once() mock_popen.assert_called_once() mock_machine.assert_not_called() @@ -136,7 +136,7 @@ class TestDockerImageCheck(unittest.TestCase): mock_machine.reset_mock() mock_popen.return_value.__enter__.return_value.wait.return_value = 0 - result, _ = di.build_and_push_one_image(image, "version2", False, True) + result, _ = di.build_and_push_one_image(image, "version2", "", False, True) mock_open.assert_called_once() mock_popen.assert_called_once() mock_machine.assert_not_called() @@ -155,7 +155,7 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.reset_mock() mock_machine.reset_mock() mock_popen.return_value.__enter__.return_value.wait.return_value = 1 - result, _ = di.build_and_push_one_image(image, "version2", False, False) + result, _ = di.build_and_push_one_image(image, "version2", "", False, False) mock_open.assert_called_once() mock_popen.assert_called_once() mock_machine.assert_not_called() @@ -169,13 +169,36 @@ class TestDockerImageCheck(unittest.TestCase): ) self.assertFalse(result) + mock_open.reset_mock() + mock_popen.reset_mock() + mock_machine.reset_mock() + mock_popen.return_value.__enter__.return_value.wait.return_value = 1 + result, _ = di.build_and_push_one_image( + image, "version2", "cached-version", False, False + ) + mock_open.assert_called_once() + mock_popen.assert_called_once() + mock_machine.assert_not_called() + self.assertIn( + f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " + f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " + "--tag name:version2 --cache-from type=registry,ref=name:version2 " + "--cache-from type=registry,ref=name:latest " + "--cache-from type=registry,ref=name:cached-version " + "--cache-to type=inline,mode=max --progress plain path", + mock_popen.call_args.args, + ) + self.assertFalse(result) + mock_open.reset_mock() mock_popen.reset_mock() mock_machine.reset_mock() only_amd64_image = di.DockerImage("path", "name", True) mock_popen.return_value.__enter__.return_value.wait.return_value = 0 - result, _ = di.build_and_push_one_image(only_amd64_image, "version", True, True) + result, _ = di.build_and_push_one_image( + only_amd64_image, "version", "", True, True + ) mock_open.assert_called_once() mock_popen.assert_called_once() mock_machine.assert_called_once() @@ -186,7 +209,7 @@ class TestDockerImageCheck(unittest.TestCase): ) self.assertTrue(result) result, _ = di.build_and_push_one_image( - only_amd64_image, "version", False, True + only_amd64_image, "version", "", False, True ) self.assertIn( "docker pull ubuntu:20.04; docker tag ubuntu:20.04 name:version; ", @@ -195,7 +218,7 @@ class TestDockerImageCheck(unittest.TestCase): @patch("docker_images_check.build_and_push_one_image") def test_process_image_with_parents(self, mock_build): - mock_build.side_effect = lambda w, x, y, z: (True, f"{w.repo}_{x}.log") + mock_build.side_effect = lambda v, w, x, y, z: (True, f"{v.repo}_{w}.log") im1 = di.DockerImage("path1", "repo1", False) im2 = di.DockerImage("path2", "repo2", False, im1) im3 = di.DockerImage("path3", "repo3", False, im2) @@ -203,7 +226,7 @@ class TestDockerImageCheck(unittest.TestCase): # We use list to have determined order of image builgings images = [im4, im1, im3, im2, im1] results = [ - di.process_image_with_parents(im, ["v1", "v2", "latest"], True) + di.process_image_with_parents(im, ["v1", "v2", "latest"], "", True) for im in images ] diff --git a/tests/ci/download_previous_release.py b/tests/ci/download_previous_release.py index fa03d164f23..86beed35b5a 100755 --- a/tests/ci/download_previous_release.py +++ b/tests/ci/download_previous_release.py @@ -4,25 +4,24 @@ import re import os import logging -import requests +import requests # type: ignore -from requests.adapters import HTTPAdapter -from urllib3.util.retry import Retry +from requests.adapters import HTTPAdapter # type: ignore +from urllib3.util.retry import Retry # type: ignore CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" -CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb" -CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static-dbg_{version}_amd64.deb" -CLICKHOUSE_SERVER_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-server_{version}_all.deb" -CLICKHOUSE_CLIENT_DOWNLOAD_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-client_{version}_all.deb" - - +DOWNLOAD_PREFIX = ( + "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/" +) CLICKHOUSE_COMMON_STATIC_PACKET_NAME = "clickhouse-common-static_{version}_amd64.deb" CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME = ( "clickhouse-common-static-dbg_{version}_amd64.deb" ) -CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_all.deb" -CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_all.deb" +CLICKHOUSE_SERVER_PACKET_NAME = "clickhouse-server_{version}_amd64.deb" +CLICKHOUSE_SERVER_PACKET_FALLBACK = "clickhouse-server_{version}_all.deb" +CLICKHOUSE_CLIENT_PACKET_NAME = "clickhouse-client_{version}_amd64.deb" +CLICKHOUSE_CLIENT_PACKET_FALLBACK = "clickhouse-client_{version}_all.deb" PACKETS_DIR = "previous_release_package_folder/" VERSION_PATTERN = r"((?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)" @@ -102,9 +101,10 @@ def download_packet(url, out_path, retries=10, backoff_factor=0.3): session.mount("http://", adapter) session.mount("https://", adapter) response = session.get(url) - print(url) - if response.ok: - open(out_path, "wb").write(response.content) + response.raise_for_status() + print(f"Download {url} to {out_path}") + with open(out_path, "wb") as fd: + fd.write(response.content) def download_packets(release, dest_path=PACKETS_DIR): @@ -113,43 +113,31 @@ def download_packets(release, dest_path=PACKETS_DIR): logging.info("Will download %s", release) - download_packet( - CLICKHOUSE_COMMON_STATIC_DOWNLOAD_URL.format( - version=release.version, type=release.type - ), - out_path=os.path.join( - dest_path, - CLICKHOUSE_COMMON_STATIC_PACKET_NAME.format(version=release.version), - ), - ) + def get_dest_path(pkg_name): + return os.path.join(dest_path, pkg_name) - download_packet( - CLICKHOUSE_COMMON_STATIC_DBG_DOWNLOAD_URL.format( - version=release.version, type=release.type - ), - out_path=os.path.join( - dest_path, - CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME.format(version=release.version), - ), - ) + for pkg in ( + CLICKHOUSE_COMMON_STATIC_PACKET_NAME, + CLICKHOUSE_COMMON_STATIC_DBG_PACKET_NAME, + ): + url = (DOWNLOAD_PREFIX + pkg).format(version=release.version, type=release.type) + pkg_name = get_dest_path(pkg.format(version=release.version)) + download_packet(url, pkg_name) - download_packet( - CLICKHOUSE_SERVER_DOWNLOAD_URL.format( - version=release.version, type=release.type - ), - out_path=os.path.join( - dest_path, CLICKHOUSE_SERVER_PACKET_NAME.format(version=release.version) - ), - ) - - download_packet( - CLICKHOUSE_CLIENT_DOWNLOAD_URL.format( - version=release.version, type=release.type - ), - out_path=os.path.join( - dest_path, CLICKHOUSE_CLIENT_PACKET_NAME.format(version=release.version) - ), - ) + for pkg, fallback in ( + (CLICKHOUSE_SERVER_PACKET_NAME, CLICKHOUSE_SERVER_PACKET_FALLBACK), + (CLICKHOUSE_CLIENT_PACKET_NAME, CLICKHOUSE_CLIENT_PACKET_FALLBACK), + ): + url = (DOWNLOAD_PREFIX + pkg).format(version=release.version, type=release.type) + pkg_name = get_dest_path(pkg.format(version=release.version)) + try: + download_packet(url, pkg_name) + except Exception: + url = (DOWNLOAD_PREFIX + fallback).format( + version=release.version, type=release.type + ) + pkg_name = get_dest_path(fallback.format(version=release.version)) + download_packet(url, pkg_name) def download_previous_release(dest_path): diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index e206b8ed7b4..4b7c100c300 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -2,6 +2,7 @@ import json import logging import os +from typing import Set from unidiff import PatchSet # type: ignore @@ -81,8 +82,10 @@ class PRInfo: else: github_event = PRInfo.default_event.copy() self.event = github_event - self.changed_files = set() + self.changed_files = set() # type: Set[str] self.body = "" + self.diff_urls = [] + self.release_pr = "" ref = github_event.get("ref", "refs/head/master") if ref and ref.startswith("refs/heads/"): ref = ref[11:] @@ -148,7 +151,7 @@ class PRInfo: response_json = user_orgs_response.json() self.user_orgs = set(org["id"] for org in response_json) - self.diff_url = github_event["pull_request"]["diff_url"] + self.diff_urls.append(github_event["pull_request"]["diff_url"]) elif "commits" in github_event: self.sha = github_event["after"] pull_request = get_pr_for_commit(self.sha, github_event["ref"]) @@ -165,7 +168,7 @@ class PRInfo: self.base_name = self.repo_full_name self.head_ref = ref self.head_name = self.repo_full_name - self.diff_url = ( + self.diff_urls.append( f"https://api.github.com/repos/{GITHUB_REPOSITORY}/" f"compare/{github_event['before']}...{self.sha}" ) @@ -179,12 +182,31 @@ class PRInfo: self.head_name = pull_request["head"]["repo"]["full_name"] self.pr_html_url = pull_request["html_url"] if "pr-backport" in self.labels: - self.diff_url = ( + # head1...head2 gives changes in head2 since merge base + # Thag's why we need {self.head_ref}...master to get + # files changed in upstream AND master...{self.head_ref} + # to get files, changed in current HEAD + self.diff_urls.append( f"https://github.com/{GITHUB_REPOSITORY}/" f"compare/master...{self.head_ref}.diff" ) + self.diff_urls.append( + f"https://github.com/{GITHUB_REPOSITORY}/" + f"compare/{self.head_ref}...master.diff" + ) + # Get release PR number. + self.release_pr = get_pr_for_commit(self.base_ref, self.base_ref)[ + "number" + ] else: - self.diff_url = pull_request["diff_url"] + self.diff_urls.append(pull_request["diff_url"]) + if "release" in self.labels: + # For release PRs we must get not only files changed in the PR + # itself, but as well files changed since we branched out + self.diff_urls.append( + f"https://github.com/{GITHUB_REPOSITORY}/" + f"compare/{self.head_ref}...master.diff" + ) else: print("event.json does not match pull_request or push:") print(json.dumps(github_event, sort_keys=True, indent=4)) @@ -205,23 +227,24 @@ class PRInfo: self.fetch_changed_files() def fetch_changed_files(self): - if not getattr(self, "diff_url", False): - raise TypeError("The event does not have diff URL") + if not getattr(self, "diff_urls", False): + raise TypeError("The event does not have diff URLs") - response = get_with_retries( - self.diff_url, - sleep=RETRY_SLEEP, - ) - response.raise_for_status() - if "commits" in self.event and self.number == 0: - diff = response.json() + for diff_url in self.diff_urls: + response = get_with_retries( + diff_url, + sleep=RETRY_SLEEP, + ) + response.raise_for_status() + if "commits" in self.event and self.number == 0: + diff = response.json() - if "files" in diff: - self.changed_files = [f["filename"] for f in diff["files"]] - else: - diff_object = PatchSet(response.text) - self.changed_files = {f.path for f in diff_object} - print("Fetched info about %d changed files" % len(self.changed_files)) + if "files" in diff: + self.changed_files = {f["filename"] for f in diff["files"]} + else: + diff_object = PatchSet(response.text) + self.changed_files.update({f.path for f in diff_object}) + print(f"Fetched info about {len(self.changed_files)} changed files") def get_dict(self): return { diff --git a/tests/ci/push_to_artifactory.py b/tests/ci/push_to_artifactory.py index be977bdd907..98de315ddae 100755 --- a/tests/ci/push_to_artifactory.py +++ b/tests/ci/push_to_artifactory.py @@ -4,11 +4,12 @@ import argparse import logging import os import re -from typing import List, Tuple +from collections import namedtuple +from typing import Dict, List, Tuple from artifactory import ArtifactorySaaSPath # type: ignore from build_download_helper import dowload_build_with_progress -from env_helper import RUNNER_TEMP +from env_helper import RUNNER_TEMP, S3_BUILDS_BUCKET from git_helper import TAG_REGEXP, commit, removeprefix, removesuffix @@ -25,88 +26,144 @@ TEMP_PATH = os.path.join(RUNNER_TEMP, "push_to_artifactory") JFROG_API_KEY = getenv("JFROG_API_KEY", "") JFROG_TOKEN = getenv("JFROG_TOKEN", "") +CheckDesc = namedtuple("CheckDesc", ("check_name", "deb_arch", "rpm_arch")) + class Packages: - rpm_arch = dict(all="noarch", amd64="x86_64") + checks = ( + CheckDesc("package_release", "amd64", "x86_64"), + CheckDesc("package_aarch64", "arm64", "aarch64"), + ) packages = ( - ("clickhouse-client", "all"), - ("clickhouse-common-static", "amd64"), - ("clickhouse-common-static-dbg", "amd64"), - ("clickhouse-server", "all"), + "clickhouse-client", + "clickhouse-common-static", + "clickhouse-common-static-dbg", + "clickhouse-server", ) def __init__(self, version: str): - self.deb = tuple( - "_".join((name, version, arch + ".deb")) for name, arch in self.packages - ) + # Dicts of name: s3_path_suffix + self.deb = {} # type: Dict[str, str] + self.rpm = {} # type: Dict[str, str] + self.tgz = {} # type: Dict[str, str] + for check in self.checks: + for name in self.packages: + deb = f"{name}_{version}_{check.deb_arch}.deb" + self.deb[deb] = f"{check.check_name}/{deb}" - self.rpm = tuple( - "-".join((name, version + "." + self.rpm_arch[arch] + ".rpm")) - for name, arch in self.packages - ) + rpm = f"{name}-{version}.{check.rpm_arch}.rpm" + self.rpm[rpm] = f"{check.check_name}/{rpm}" - self.tgz = tuple(f"{name}-{version}-amd64.tgz" for name, _ in self.packages) + tgz = f"{name}-{version}-{check.deb_arch}.tgz" + self.tgz[tgz] = f"{check.check_name}/{tgz}" def arch(self, deb_pkg: str) -> str: if deb_pkg not in self.deb: raise ValueError(f"{deb_pkg} not in {self.deb}") return removesuffix(deb_pkg, ".deb").split("_")[-1] + def replace_with_fallback(self, name: str): + if name.endswith(".deb"): + suffix = self.deb.pop(name) + self.deb[self.fallback_to_all(name)] = self.fallback_to_all(suffix) + elif name.endswith(".rpm"): + suffix = self.rpm.pop(name) + self.rpm[self.fallback_to_all(name)] = self.fallback_to_all(suffix) + elif name.endswith(".tgz"): + suffix = self.tgz.pop(name) + self.tgz[self.fallback_to_all(name)] = self.fallback_to_all(suffix) + else: + raise KeyError(f"unknown package type for {name}") + @staticmethod def path(package_file: str) -> str: return os.path.join(TEMP_PATH, package_file) + @staticmethod + def fallback_to_all(url_or_name: str): + """Until July 2022 we had clickhouse-server and clickhouse-client with + arch 'all'""" + # deb + if url_or_name.endswith("amd64.deb") or url_or_name.endswith("arm64.deb"): + return f"{url_or_name[:-9]}all.deb" + # rpm + if url_or_name.endswith("x86_64.rpm") or url_or_name.endswith("aarch64.rpm"): + new = removesuffix(removesuffix(url_or_name, "x86_64.rpm"), "aarch64.rpm") + return f"{new}noarch.rpm" + # tgz + if url_or_name.endswith("-amd64.tgz") or url_or_name.endswith("-arm64.tgz"): + return f"{url_or_name[:-10]}.tgz" + return url_or_name + class S3: template = ( "https://s3.amazonaws.com/" # "clickhouse-builds/" - "{bucket_name}/" + f"{S3_BUILDS_BUCKET}/" # "33333/" or "21.11/" from --release, if pull request is omitted "{pr}/" # "2bef313f75e4cacc6ea2ef2133e8849ecf0385ec/" "{commit}/" - # "package_release/" - "{check_name}/" - # "clickhouse-common-static_21.11.5.0_amd64.deb" - "{package}" + # "package_release/clickhouse-common-static_21.11.5.0_amd64.deb" + "{s3_path_suffix}" ) def __init__( self, - bucket_name: str, pr: int, commit: str, - check_name: str, version: str, force_download: bool, ): self._common = dict( - bucket_name=bucket_name, pr=pr, commit=commit, - check_name=check_name, ) self.force_download = force_download self.packages = Packages(version) - def download_package(self, package_file: str): - if not self.force_download and os.path.exists(Packages.path(package_file)): + def download_package(self, package_file: str, s3_path_suffix: str): + path = Packages.path(package_file) + fallback_path = Packages.fallback_to_all(path) + if not self.force_download and ( + os.path.exists(path) or os.path.exists(fallback_path) + ): + if os.path.exists(fallback_path): + self.packages.replace_with_fallback(package_file) + return - url = self.template.format_map({**self._common, "package": package_file}) - dowload_build_with_progress(url, Packages.path(package_file)) + url = self.template.format_map( + {**self._common, "s3_path_suffix": s3_path_suffix} + ) + try: + dowload_build_with_progress(url, path) + except Exception as e: + if "Cannot download dataset from" in e.args[0]: + new_url = Packages.fallback_to_all(url) + logging.warning( + "Fallback downloading %s for old release", fallback_path + ) + dowload_build_with_progress(new_url, fallback_path) + self.packages.replace_with_fallback(package_file) def download_deb(self): - for package_file in self.packages.deb: - self.download_package(package_file) + # Copy to have a way to pop/add fallback packages + packages = self.packages.deb.copy() + for package_file, s3_path_suffix in packages.items(): + self.download_package(package_file, s3_path_suffix) def download_rpm(self): - for package_file in self.packages.rpm: - self.download_package(package_file) + # Copy to have a way to pop/add fallback packages + packages = self.packages.rpm.copy() + for package_file, s3_path_suffix in packages.items(): + self.download_package(package_file, s3_path_suffix) def download_tgz(self): - for package_file in self.packages.tgz: - self.download_package(package_file) + # Copy to have a way to pop/add fallback packages + packages = self.packages.tgz.copy() + for package_file, s3_path_suffix in packages.items(): + self.download_package(package_file, s3_path_suffix) class Release: @@ -223,17 +280,6 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--commit", required=True, type=commit, help="commit hash for S3 bucket" ) - parser.add_argument( - "--bucket-name", - default="clickhouse-builds", - help="AWS S3 bucket name", - ) - parser.add_argument( - "--check-name", - default="package_release", - help="check name, a part of bucket path, " - "will be converted to lower case with spaces->underscore", - ) parser.add_argument( "--all", action="store_true", help="implies all deb, rpm and tgz" ) @@ -276,7 +322,6 @@ def parse_args() -> argparse.Namespace: args.deb = args.rpm = args.tgz = True if not (args.deb or args.rpm or args.tgz): parser.error("at least one of --deb, --rpm or --tgz should be specified") - args.check_name = args.check_name.lower().replace(" ", "_") if args.pull_request == 0: args.pull_request = ".".join(args.release.version_parts[:2]) return args @@ -305,10 +350,8 @@ def main(): args = parse_args() os.makedirs(TEMP_PATH, exist_ok=True) s3 = S3( - args.bucket_name, args.pull_request, args.commit, - args.check_name, args.release.version, args.force_download, ) diff --git a/tests/ci/team_keys_lambda/.gitignore b/tests/ci/team_keys_lambda/.gitignore new file mode 100644 index 00000000000..4c845d295ee --- /dev/null +++ b/tests/ci/team_keys_lambda/.gitignore @@ -0,0 +1,2 @@ +lambda-venv +lambda-package.zip diff --git a/tests/ci/team_keys_lambda/Dockerfile b/tests/ci/team_keys_lambda/Dockerfile deleted file mode 100644 index 0d50224c51d..00000000000 --- a/tests/ci/team_keys_lambda/Dockerfile +++ /dev/null @@ -1,13 +0,0 @@ -FROM public.ecr.aws/lambda/python:3.9 - -# Install the function's dependencies using file requirements.txt -# from your project folder. - -COPY requirements.txt . -RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" - -# Copy function code -COPY app.py ${LAMBDA_TASK_ROOT} - -# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) -CMD [ "app.handler" ] diff --git a/tests/ci/team_keys_lambda/app.py b/tests/ci/team_keys_lambda/app.py index 50ef753001c..9e73a3f0993 100644 --- a/tests/ci/team_keys_lambda/app.py +++ b/tests/ci/team_keys_lambda/app.py @@ -3,13 +3,43 @@ import argparse import json -from threading import Thread +from datetime import datetime from queue import Queue +from threading import Thread import requests # type: ignore +import boto3 # type: ignore -def get_org_team_members(token: str, org: str, team_slug: str) -> tuple: +class Keys(set): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.updated_at = 0 + + def update_now(self): + self.updated_at = datetime.now().timestamp() + + +keys = Keys() + + +class Worker(Thread): + def __init__(self, request_queue): + Thread.__init__(self) + self.queue = request_queue + self.results = set() + + def run(self): + while True: + m = self.queue.get() + if m == "": + break + response = requests.get(f"https://github.com/{m}.keys") + self.results.add(f"# {m}\n{response.text}\n") + self.queue.task_done() + + +def get_org_team_members(token: str, org: str, team_slug: str) -> set: headers = { "Authorization": f"token {token}", "Accept": "application/vnd.github.v3+json", @@ -19,24 +49,12 @@ def get_org_team_members(token: str, org: str, team_slug: str) -> tuple: ) response.raise_for_status() data = response.json() - return tuple(m["login"] for m in data) + return set(m["login"] for m in data) -def get_members_keys(members: tuple) -> str: - class Worker(Thread): - def __init__(self, request_queue): - Thread.__init__(self) - self.queue = request_queue - self.results = [] - - def run(self): - while True: - m = self.queue.get() - if m == "": - break - response = requests.get(f"https://github.com/{m}.keys") - self.results.append(f"# {m}\n{response.text}") - self.queue.task_done() +def get_cached_members_keys(members: set) -> Keys: + if (datetime.now().timestamp() - 3600) <= keys.updated_at: + return keys q = Queue() # type: Queue workers = [] @@ -55,15 +73,14 @@ def get_members_keys(members: tuple) -> str: for worker in workers: worker.join() - responses = [] + keys.clear() for worker in workers: - responses.extend(worker.results) - return "".join(responses) + keys.update(worker.results) + keys.update_now() + return keys def get_token_from_aws() -> str: - import boto3 # type: ignore - secret_name = "clickhouse_robot_token" session = boto3.session.Session() client = session.client( @@ -76,21 +93,26 @@ def get_token_from_aws() -> str: def main(token: str, org: str, team_slug: str) -> str: members = get_org_team_members(token, org, team_slug) - keys = get_members_keys(members) + keys = get_cached_members_keys(members) - return keys + return "".join(sorted(keys)) def handler(event, context): _ = context _ = event - token = get_token_from_aws() + if keys.updated_at < (datetime.now().timestamp() - 3600): + token = get_token_from_aws() + body = main(token, "ClickHouse", "core") + else: + body = "".join(sorted(keys)) + result = { "statusCode": 200, "headers": { "Content-Type": "text/html", }, - "body": main(token, "ClickHouse", "core"), + "body": body, } return result @@ -106,6 +128,6 @@ if __name__ == "__main__": parser.add_argument("--team", help="GitHub team name", default="core") args = parser.parse_args() - keys = main(args.token, args.organization, args.team) + output = main(args.token, args.organization, args.team) - print(f"# Just shoing off the keys:\n{keys}") + print(f"# Just shoing off the keys:\n{output}") diff --git a/tests/ci/team_keys_lambda/build_and_deploy_archive.sh b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh new file mode 100644 index 00000000000..7b89ea11ede --- /dev/null +++ b/tests/ci/team_keys_lambda/build_and_deploy_archive.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -xeo pipefail + +VENV=lambda-venv +py_exec=$(which python3) +py_version=$(basename "$(readlink -f "$py_exec")") +rm -rf "$VENV" lambda-package.zip +virtualenv "$VENV" +source "$VENV/bin/activate" +pip install -r requirements.txt +PACKAGES="$VENV/lib/$py_version/site-packages" +cp app.py "$PACKAGES/" +( cd "$PACKAGES" && zip -r ../../../../lambda-package.zip . ) + +aws lambda update-function-code --function-name team-keys-lambda --zip-file fileb://lambda-package.zip diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index 6838d925500..74ad4be2547 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -14,14 +14,77 @@ export RUNNER_HOME=/home/ubuntu/actions-runner export RUNNER_URL="https://github.com/ClickHouse" # Funny fact, but metadata service has fixed IP -INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) +INSTANCE_ID=$(ec2metadata --instance-id) export INSTANCE_ID # combine labels -RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" | jq '.Tags[] | select(."Key" == "github:runner-type") | .Value' -r) +RUNNER_TYPE=$(/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values=$INSTANCE_ID" --query "Tags[?Key=='github:runner-type'].Value" --output text) LABELS="self-hosted,Linux,$(uname -m),$RUNNER_TYPE" export LABELS +# Refresh CloudWatch agent config +aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json +systemctl restart amazon-cloudwatch-agent.service + +# Refresh teams ssh keys +TEAM_KEYS_URL=$(aws ssm get-parameter --region us-east-1 --name team-keys-url --query 'Parameter.Value' --output=text) +curl "${TEAM_KEYS_URL}" > /home/ubuntu/.ssh/authorized_keys2 +chown ubuntu: /home/ubuntu/.ssh -R + + +# Create a pre-run script that will restart docker daemon before the job started +mkdir -p /tmp/actions-hooks +cat > /tmp/actions-hooks/pre-run.sh << 'EOF' +#!/bin/bash +set -xuo pipefail + +echo "Runner's public DNS: $(ec2metadata --public-hostname)" +EOF + +cat > /tmp/actions-hooks/post-run.sh << 'EOF' +#!/bin/bash +set -xuo pipefail + +terminate-and-exit() { + echo "Going to terminate the runner" + INSTANCE_ID=$(ec2metadata --instance-id) + # We execute it with at to not have it as an orphan process + # GH Runners kill all remain processes + echo "sleep 10; aws ec2 terminate-instances --instance-ids $INSTANCE_ID" | at now + exit 0 +} + +# Free KiB, free percents +ROOT_STAT=($(df / | awk '/\// {print $4 " " int($4/$2 * 100)}')) +if [[ ${ROOT_STAT[0]} -lt 3000000 ]] || [[ ${ROOT_STAT[1]} -lt 5 ]]; then + echo "The runner has ${ROOT_STAT[0]}KiB and ${ROOT_STAT[1]}% of free space on /" + terminate-and-exit +fi + +# shellcheck disable=SC2046 +docker kill $(docker ps -q) ||: +# shellcheck disable=SC2046 +docker rm -f $(docker ps -a -q) ||: + +# If we have hanged containers after the previous commands, than we have a hanged one +# and should restart the daemon +if [ "$(docker ps -a -q)" ]; then + # Systemd service of docker has StartLimitBurst=3 and StartLimitInterval=60s, + # that's why we try restarting it for long + for i in {1..25}; + do + sudo systemctl restart docker && break || sleep 5 + done + + for i in {1..10} + do + docker info && break || sleep 2 + done + # Last chance, otherwise we have to terminate poor instance + docker info 1>/dev/null || { echo Docker unable to start; terminate-and-exit; } +fi +EOF + while true; do runner_pid=$(pgrep run.sh) echo "Got runner pid $runner_pid" @@ -38,7 +101,10 @@ while true; do sudo -u ubuntu ./config.sh --url $RUNNER_URL --token "$RUNNER_TOKEN" --name "$INSTANCE_ID" --runnergroup Default --labels "$LABELS" --work _work echo "Run" - sudo -u ubuntu ./run.sh & + sudo -u ubuntu \ + ACTIONS_RUNNER_HOOK_JOB_STARTED=/tmp/actions-hooks/pre-run.sh \ + ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/tmp/actions-hooks/post-run.sh \ + ./run.sh & sleep 15 else echo "Runner is working with pid $runner_pid, nothing to do" diff --git a/tests/ci/worker/ubuntu_ami_for_ci.sh b/tests/ci/worker/ubuntu_ami_for_ci.sh index 23d3b18c810..c5bc090d8d8 100644 --- a/tests/ci/worker/ubuntu_ami_for_ci.sh +++ b/tests/ci/worker/ubuntu_ami_for_ci.sh @@ -3,7 +3,7 @@ set -xeuo pipefail echo "Running prepare script" export DEBIAN_FRONTEND=noninteractive -export RUNNER_VERSION=2.285.1 +export RUNNER_VERSION=2.293.0 export RUNNER_HOME=/home/ubuntu/actions-runner deb_arch() { @@ -28,6 +28,7 @@ apt-get update apt-get install --yes --no-install-recommends \ apt-transport-https \ + atop \ binfmt-support \ build-essential \ ca-certificates \ @@ -56,6 +57,11 @@ cat < /etc/docker/daemon.json { "ipv6": true, "fixed-cidr-v6": "2001:db8:1::/64", + "log-driver": "json-file", + "log-opts": { + "max-file": "5", + "max-size": "1000m" + }, "insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"], "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] } @@ -92,7 +98,15 @@ rm -rf /home/ubuntu/awscliv2.zip /home/ubuntu/aws mkdir -p /home/ubuntu/.ssh # ~/.ssh/authorized_keys is cleaned out, so we use deprecated but working ~/.ssh/authorized_keys2 -aws lambda invoke --region us-east-1 --function-name team-keys-lambda /tmp/core.keys -jq < /tmp/core.keys -r '.body' > /home/ubuntu/.ssh/authorized_keys2 +TEAM_KEYS_URL=$(aws ssm get-parameter --region us-east-1 --name team-keys-url --query 'Parameter.Value' --output=text) +curl "${TEAM_KEYS_URL}" > /home/ubuntu/.ssh/authorized_keys2 chown ubuntu: /home/ubuntu/.ssh -R chmod 0700 /home/ubuntu/.ssh + +# Download cloudwatch agent and install config for it +wget --directory-prefix=/tmp https://s3.amazonaws.com/amazoncloudwatch-agent/ubuntu/"$(deb_arch)"/latest/amazon-cloudwatch-agent.deb{,.sig} +gpg --recv-key --keyserver keyserver.ubuntu.com D58167303B789C72 +gpg --verify /tmp/amazon-cloudwatch-agent.deb.sig +dpkg -i /tmp/amazon-cloudwatch-agent.deb +aws ssm get-parameter --region us-east-1 --name AmazonCloudWatch-github-runners --query 'Parameter.Value' --output text > /opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json +systemctl enable amazon-cloudwatch-agent.service diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8744e8bf95b..cab6daf3a50 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -499,6 +499,7 @@ class TestCase: database = testcase_args.database os.environ.setdefault("CLICKHOUSE_DATABASE", database) os.environ.setdefault("CLICKHOUSE_TMP", suite_tmp_dir) + testcase_args.test_tmp_dir = suite_tmp_dir else: # If --database is not specified, we will create temporary database with # unique name and we will recreate and drop it for each test @@ -525,10 +526,20 @@ class TestCase: # collisions. testcase_args.test_tmp_dir = os.path.join(suite_tmp_dir, database) os.mkdir(testcase_args.test_tmp_dir) - os.environ.setdefault("CLICKHOUSE_TMP", testcase_args.test_tmp_dir) + os.environ["CLICKHOUSE_TMP"] = testcase_args.test_tmp_dir testcase_args.testcase_database = database + # Printed only in case of failures + # + # NOTE: here we use "CLICKHOUSE_TMP" instead of "file_suffix", + # so it is installed in configure_testcase_args() unlike other files + # (stdout_file, stderr_file) in TestCase::__init__(). + # Since using CLICKHOUSE_TMP is easier to use in expect. + testcase_args.debug_log_file = ( + os.path.join(testcase_args.test_tmp_dir, testcase_basename) + ".debuglog" + ) + return testcase_args def cli_random_settings(self) -> str: @@ -699,7 +710,7 @@ class TestCase: return None - def process_result_impl(self, proc, stdout: str, stderr: str, total_time: float): + def process_result_impl(self, proc, stdout: str, stderr: str, debug_log: str, total_time: float): description = "" if proc: @@ -712,6 +723,9 @@ class TestCase: if stderr: description += stderr + if debug_log: + description += "\n" + description += debug_log return TestResult( self.name, TestStatus.FAIL, @@ -727,6 +741,9 @@ class TestCase: if stderr: description += "\n" description += stderr + if debug_log: + description += "\n" + description += debug_log # Stop on fatal errors like segmentation fault. They are sent to client via logs. if " " in stderr: @@ -757,6 +774,9 @@ class TestCase: if stderr: description += "\n{}\n".format("\n".join(stderr.splitlines()[:100])) description += f"\nstdout:\n{stdout}\n" + if debug_log: + description += "\n" + description += debug_log return TestResult( self.name, TestStatus.FAIL, @@ -767,6 +787,9 @@ class TestCase: if "Exception" in stdout: description += "\n{}\n".format("\n".join(stdout.splitlines()[:100])) + if debug_log: + description += "\n" + description += debug_log return TestResult( self.name, TestStatus.FAIL, @@ -813,6 +836,9 @@ class TestCase: universal_newlines=True, ).communicate()[0] description += f"\n{diff}\n" + if debug_log: + description += "\n" + description += debug_log return TestResult( self.name, TestStatus.FAIL, @@ -826,6 +852,9 @@ class TestCase: and total_time > 60 and "long" not in self.tags ): + if debug_log: + description += "\n" + description += debug_log # We're in Flaky Check mode, check the run time as well while we're at it. return TestResult( self.name, @@ -839,6 +868,8 @@ class TestCase: os.remove(self.stdout_file) if os.path.exists(self.stderr_file): os.remove(self.stderr_file) + if os.path.exists(self.testcase_args.debug_log_file): + os.remove(self.testcase_args.debug_log_file) return TestResult(self.name, TestStatus.OK, None, total_time, description) @@ -872,7 +903,7 @@ class TestCase: def run_single_test( self, server_logs_level, client_options - ) -> Tuple[Optional[Popen], str, str, float]: + ) -> Tuple[Optional[Popen], str, str, str, float]: args = self.testcase_args client = args.testcase_client start_time = args.testcase_start_time @@ -922,6 +953,13 @@ class TestCase: ) need_drop_database = not maybe_passed + debug_log = "" + if os.path.exists(self.testcase_args.debug_log_file): + with open(self.testcase_args.debug_log_file, "rb") as stream: + debug_log += self.testcase_args.debug_log_file + ":\n" + debug_log += str(stream.read(), errors="replace", encoding="utf-8") + debug_log += "\n" + if need_drop_database: seconds_left = max( args.timeout - (datetime.now() - start_time).total_seconds(), 20 @@ -941,6 +979,7 @@ class TestCase: None, "", f"Timeout dropping database {database} after test", + debug_log, total_time, ) shutil.rmtree(args.test_tmp_dir) @@ -964,12 +1003,13 @@ class TestCase: if os.path.exists(self.stdout_file): with open(self.stdout_file, "rb") as stdfd: stdout = str(stdfd.read(), errors="replace", encoding="utf-8") + stderr = "" if os.path.exists(self.stderr_file): with open(self.stderr_file, "rb") as stdfd: - stderr = str(stdfd.read(), errors="replace", encoding="utf-8") + stderr += str(stdfd.read(), errors="replace", encoding="utf-8") - return proc, stdout, stderr, total_time + return proc, stdout, stderr, debug_log, total_time def run(self, args, suite, client_options, server_logs_level): try: @@ -994,11 +1034,11 @@ class TestCase: args, self.case_file, suite.suite_tmp_path ) client_options = self.add_random_settings(args, client_options) - proc, stdout, stderr, total_time = self.run_single_test( + proc, stdout, stderr, debug_log, total_time = self.run_single_test( server_logs_level, client_options ) - result = self.process_result_impl(proc, stdout, stderr, total_time) + result = self.process_result_impl(proc, stdout, stderr, debug_log, total_time) result.check_if_need_retry(args, stdout, stderr, self.runs_count) if result.status == TestStatus.FAIL: result.description = self.add_info_about_settings( @@ -1594,6 +1634,8 @@ def do_run_tests(jobs, test_suite: TestSuite, parallel): queue.close() except Full: + print("Couldn't put test to the queue within timeout. Server probably hung.") + print_stacktraces() queue.close() pool.join() diff --git a/tests/integration/README.md b/tests/integration/README.md index ef0b5a4b334..2d44ff70861 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -77,25 +77,25 @@ Notes: You can run tests via `./runner` script and pass pytest arguments as last arg: ``` -$ ./runner --binary $HOME/ClickHouse/programs/clickhouse --bridge-binary $HOME/ClickHouse/programs/clickhouse-odbc-bridge --base-configs-dir $HOME/ClickHouse/programs/server/ 'test_odbc_interaction -ss' +$ ./runner --binary $HOME/ClickHouse/programs/clickhouse --odbc-bridge-binary $HOME/ClickHouse/programs/clickhouse-odbc-bridge --base-configs-dir $HOME/ClickHouse/programs/server/ 'test_ssl_cert_authentication -ss' Start tests -============================= test session starts ============================== -platform linux2 -- Python 2.7.15rc1, pytest-4.0.0, py-1.7.0, pluggy-0.8.0 -rootdir: /ClickHouse/tests/integration, inifile: pytest.ini -collected 6 items +====================================================================================================== test session starts ====================================================================================================== +platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0 -- /usr/bin/python3 +cachedir: .pytest_cache +rootdir: /ClickHouse/tests/integration, configfile: pytest.ini +plugins: repeat-0.9.1, xdist-2.5.0, forked-1.4.0, order-1.0.0, timeout-2.1.0 +timeout: 900.0s +timeout method: signal +timeout func_only: False +collected 4 items -test_odbc_interaction/test.py Removing network clickhouse_default -... +test_ssl_cert_authentication/test.py::test_https Copy common default production configuration from /clickhouse-config. Files: config.xml, users.xml +PASSED +test_ssl_cert_authentication/test.py::test_https_wrong_cert PASSED +test_ssl_cert_authentication/test.py::test_https_non_ssl_auth PASSED +test_ssl_cert_authentication/test.py::test_create_user PASSED -Killing roottestodbcinteraction_node1_1 ... done -Killing roottestodbcinteraction_mysql1_1 ... done -Killing roottestodbcinteraction_postgres1_1 ... done -Removing roottestodbcinteraction_node1_1 ... done -Removing roottestodbcinteraction_mysql1_1 ... done -Removing roottestodbcinteraction_postgres1_1 ... done -Removing network roottestodbcinteraction_default - -==================== 6 passed, 1 warnings in 95.21 seconds ===================== +================================================================================================= 4 passed in 118.58s (0:01:58) ================================================================================================= ``` diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 0d32547358c..5983c886680 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -22,12 +22,14 @@ try: # Please, add modules that required for specific tests only here. # So contributors will be able to run most tests locally # without installing tons of unneeded packages that may be not so easy to install. + import asyncio from cassandra.policies import RoundRobinPolicy import cassandra.cluster import psycopg2 from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT import pymongo import pymysql + import nats import meilisearch from confluent_kafka.avro.cached_schema_registry_client import ( CachedSchemaRegistryClient, @@ -213,6 +215,11 @@ def check_rabbitmq_is_available(rabbitmq_id): return p.returncode == 0 +async def check_nats_is_available(nats_ip): + nc = await nats.connect("{}:4444".format(nats_ip), user="click", password="house") + return nc.is_connected + + def enable_consistent_hash_plugin(rabbitmq_id): p = subprocess.Popen( ( @@ -336,6 +343,7 @@ class ClickHouseCluster: self.base_kafka_cmd = [] self.base_kerberized_kafka_cmd = [] self.base_rabbitmq_cmd = [] + self.base_nats_cmd = [] self.base_cassandra_cmd = [] self.base_jdbc_bridge_cmd = [] self.base_redis_cmd = [] @@ -352,6 +360,7 @@ class ClickHouseCluster: self.with_kafka = False self.with_kerberized_kafka = False self.with_rabbitmq = False + self.with_nats = False self.with_odbc_drivers = False self.with_hdfs = False self.with_kerberized_hdfs = False @@ -439,6 +448,11 @@ class ClickHouseCluster: self.rabbitmq_dir = p.abspath(p.join(self.instances_dir, "rabbitmq")) self.rabbitmq_logs_dir = os.path.join(self.rabbitmq_dir, "logs") + self.nats_host = "nats1" + self.nats_ip = None + self.nats_port = 4444 + self.nats_docker_id = None + # available when with_nginx == True self.nginx_host = "nginx" self.nginx_ip = None @@ -1012,6 +1026,26 @@ class ClickHouseCluster: ] return self.base_rabbitmq_cmd + def setup_nats_cmd(self, instance, env_variables, docker_compose_yml_dir): + self.with_nats = True + env_variables["NATS_HOST"] = self.nats_host + env_variables["NATS_INTERNAL_PORT"] = "4444" + env_variables["NATS_EXTERNAL_PORT"] = str(self.nats_port) + + self.base_cmd.extend( + ["--file", p.join(docker_compose_yml_dir, "docker_compose_nats.yml")] + ) + self.base_nats_cmd = [ + "docker-compose", + "--env-file", + instance.env_file, + "--project-name", + self.project_name, + "--file", + p.join(docker_compose_yml_dir, "docker_compose_nats.yml"), + ] + return self.base_nats_cmd + def setup_mongo_secure_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_mongo = self.with_mongo_secure = True env_variables["MONGO_HOST"] = self.mongo_host @@ -1202,6 +1236,7 @@ class ClickHouseCluster: with_kafka=False, with_kerberized_kafka=False, with_rabbitmq=False, + with_nats=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, @@ -1291,6 +1326,7 @@ class ClickHouseCluster: with_kafka=with_kafka, with_kerberized_kafka=with_kerberized_kafka, with_rabbitmq=with_rabbitmq, + with_nats=with_nats, with_nginx=with_nginx, with_kerberized_hdfs=with_kerberized_hdfs, with_mongo=with_mongo or with_mongo_secure, @@ -1427,6 +1463,11 @@ class ClickHouseCluster: self.setup_rabbitmq_cmd(instance, env_variables, docker_compose_yml_dir) ) + if with_nats and not self.with_nats: + cmds.append( + self.setup_nats_cmd(instance, env_variables, docker_compose_yml_dir) + ) + if with_nginx and not self.with_nginx: cmds.append( self.setup_nginx_cmd(instance, env_variables, docker_compose_yml_dir) @@ -1875,6 +1916,18 @@ class ClickHouseCluster: raise Exception("Cannot wait RabbitMQ container") return False + def wait_nats_is_available(self, nats_ip, max_retries=5): + retries = 0 + while True: + if asyncio.run(check_nats_is_available(nats_ip)): + break + else: + retries += 1 + if retries > max_retries: + raise Exception("NATS is not available") + logging.debug("Waiting for NATS to start up") + time.sleep(1) + def wait_nginx_to_start(self, timeout=60): self.nginx_ip = self.get_instance_ip(self.nginx_host) start = time.time() @@ -2347,6 +2400,14 @@ class ClickHouseCluster: if self.wait_rabbitmq_to_start(throw=(i == 4)): break + if self.with_nats and self.base_nats_cmd: + logging.debug("Setup NATS") + subprocess_check_call(self.base_nats_cmd + common_opts) + self.nats_docker_id = self.get_instance_docker_id("nats1") + self.up_called = True + self.nats_ip = self.get_instance_ip("nats1") + self.wait_nats_is_available(self.nats_ip) + if self.with_hdfs and self.base_hdfs_cmd: logging.debug("Setup HDFS") os.makedirs(self.hdfs_logs_dir) @@ -2708,6 +2769,7 @@ class ClickHouseInstance: with_kafka, with_kerberized_kafka, with_rabbitmq, + with_nats, with_nginx, with_kerberized_hdfs, with_mongo, @@ -2789,6 +2851,7 @@ class ClickHouseInstance: self.with_kafka = with_kafka self.with_kerberized_kafka = with_kerberized_kafka self.with_rabbitmq = with_rabbitmq + self.with_nats = with_nats self.with_nginx = with_nginx self.with_kerberized_hdfs = with_kerberized_hdfs self.with_mongo = with_mongo @@ -3771,6 +3834,9 @@ class ClickHouseInstance: if self.with_rabbitmq: depends_on.append("rabbitmq1") + if self.with_nats: + depends_on.append("nats1") + if self.with_zookeeper: depends_on.append("zoo1") depends_on.append("zoo2") diff --git a/tests/integration/test_access_control_on_cluster/test.py b/tests/integration/test_access_control_on_cluster/test.py index 6c2331178e0..db76233a35f 100644 --- a/tests/integration/test_access_control_on_cluster/test.py +++ b/tests/integration/test_access_control_on_cluster/test.py @@ -49,3 +49,13 @@ def test_access_control_on_cluster(): assert "There is no user `Alex`" in ch1.query_and_get_error("SHOW CREATE USER Alex") assert "There is no user `Alex`" in ch2.query_and_get_error("SHOW CREATE USER Alex") assert "There is no user `Alex`" in ch3.query_and_get_error("SHOW CREATE USER Alex") + + +def test_grant_all_on_cluster(): + ch1.query("CREATE USER IF NOT EXISTS Alex ON CLUSTER 'cluster'") + ch1.query("GRANT ALL ON *.* TO Alex ON CLUSTER 'cluster'") + + assert ch1.query("SHOW GRANTS FOR Alex") == "GRANT ALL ON *.* TO Alex\n" + assert ch2.query("SHOW GRANTS FOR Alex") == "GRANT ALL ON *.* TO Alex\n" + + ch1.query("DROP USER Alex ON CLUSTER 'cluster'") diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 3996a31e7c9..a930ddac7df 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -138,7 +138,7 @@ def test_backup_table_under_another_name(): assert instance.query("SELECT count(), sum(x) FROM test.table2") == "100\t4950\n" -def test_materialized_view(): +def test_materialized_view_select_1(): backup_name = new_backup_name() instance.query( "CREATE MATERIALIZED VIEW mv_1(x UInt8) ENGINE=MergeTree ORDER BY tuple() POPULATE AS SELECT 1 AS x" @@ -456,18 +456,32 @@ def test_temporary_table(): ) == TSV([["e"], ["q"], ["w"]]) -# "BACKUP DATABASE _temporary_and_external_tables" is allowed but the backup must not contain these tables. -def test_temporary_tables_database(): +# The backup created by "BACKUP DATABASE _temporary_and_external_tables" must not contain tables from other sessions. +def test_temporary_database(): session_id = new_session_id() instance.http_query( "CREATE TEMPORARY TABLE temp_tbl(s String)", params={"session_id": session_id} ) - backup_name = new_backup_name() - instance.query(f"BACKUP DATABASE _temporary_and_external_tables TO {backup_name}") + other_session_id = new_session_id() + instance.http_query( + "CREATE TEMPORARY TABLE other_temp_tbl(s String)", + params={"session_id": other_session_id}, + ) - assert os.listdir(os.path.join(get_path_to_backup(backup_name), "metadata/")) == [ - "_temporary_and_external_tables.sql" # database metadata only + backup_name = new_backup_name() + instance.http_query( + f"BACKUP DATABASE _temporary_and_external_tables TO {backup_name}", + params={"session_id": session_id}, + ) + + assert os.listdir( + os.path.join(get_path_to_backup(backup_name), "temporary_tables/metadata") + ) == ["temp_tbl.sql"] + + assert sorted(os.listdir(get_path_to_backup(backup_name))) == [ + ".backup", + "temporary_tables", ] @@ -711,3 +725,107 @@ def test_system_users_async(): instance.query("SHOW CREATE USER u1") == "CREATE USER u1 IDENTIFIED WITH sha256_password SETTINGS custom_c = 3\n" ) + + +def test_projection(): + create_and_fill_table(n=3) + + instance.query("ALTER TABLE test.table ADD PROJECTION prjmax (SELECT MAX(x))") + instance.query(f"INSERT INTO test.table VALUES (100, 'a'), (101, 'b')") + + assert ( + instance.query( + "SELECT count() FROM system.projection_parts WHERE database='test' AND table='table' AND name='prjmax'" + ) + == "2\n" + ) + + backup_name = new_backup_name() + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + + assert os.path.exists( + os.path.join( + get_path_to_backup(backup_name), "data/test/table/1_5_5_0/data.bin" + ) + ) + + assert os.path.exists( + os.path.join( + get_path_to_backup(backup_name), + "data/test/table/1_5_5_0/prjmax.proj/data.bin", + ) + ) + + instance.query("DROP TABLE test.table") + + assert ( + instance.query( + "SELECT count() FROM system.projection_parts WHERE database='test' AND table='table' AND name='prjmax'" + ) + == "0\n" + ) + + instance.query(f"RESTORE TABLE test.table FROM {backup_name}") + + assert instance.query("SELECT * FROM test.table ORDER BY x") == TSV( + [[0, "0"], [1, "1"], [2, "2"], [100, "a"], [101, "b"]] + ) + + assert ( + instance.query( + "SELECT count() FROM system.projection_parts WHERE database='test' AND table='table' AND name='prjmax'" + ) + == "2\n" + ) + + +def test_system_functions(): + instance.query("CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;") + + instance.query("CREATE FUNCTION parity_str AS (n) -> if(n % 2, 'odd', 'even');") + + backup_name = new_backup_name() + instance.query(f"BACKUP TABLE system.functions TO {backup_name}") + + instance.query("DROP FUNCTION linear_equation") + instance.query("DROP FUNCTION parity_str") + + instance.query(f"RESTORE TABLE system.functions FROM {backup_name}") + + assert instance.query( + "SELECT number, linear_equation(number, 2, 1) FROM numbers(3)" + ) == TSV([[0, 1], [1, 3], [2, 5]]) + + assert instance.query("SELECT number, parity_str(number) FROM numbers(3)") == TSV( + [[0, "even"], [1, "odd"], [2, "even"]] + ) + + +def test_backup_partition(): + create_and_fill_table(n=30) + + backup_name = new_backup_name() + instance.query(f"BACKUP TABLE test.table PARTITIONS '1', '4' TO {backup_name}") + + instance.query("DROP TABLE test.table") + + instance.query(f"RESTORE TABLE test.table FROM {backup_name}") + + assert instance.query("SELECT * FROM test.table ORDER BY x") == TSV( + [[1, "1"], [4, "4"], [11, "11"], [14, "14"], [21, "21"], [24, "24"]] + ) + + +def test_restore_partition(): + create_and_fill_table(n=30) + + backup_name = new_backup_name() + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + + instance.query("DROP TABLE test.table") + + instance.query(f"RESTORE TABLE test.table PARTITIONS '2', '3' FROM {backup_name}") + + assert instance.query("SELECT * FROM test.table ORDER BY x") == TSV( + [[2, "2"], [3, "3"], [12, "12"], [13, "13"], [22, "22"], [23, "23"]] + ) diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 6264959fbce..02f855cf766 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -514,3 +514,141 @@ def test_system_users(): node1.query("SHOW CREATE USER u1") == "CREATE USER u1 SETTINGS custom_a = 123\n" ) assert node1.query("SHOW GRANTS FOR u1") == "GRANT SELECT ON default.tbl TO u1\n" + + +def test_projection(): + node1.query( + "CREATE TABLE tbl ON CLUSTER 'cluster' (x UInt32, y String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/', '{replica}') " + "ORDER BY y PARTITION BY x%10" + ) + node1.query(f"INSERT INTO tbl SELECT number, toString(number) FROM numbers(3)") + + node1.query("ALTER TABLE tbl ADD PROJECTION prjmax (SELECT MAX(x))") + node1.query(f"INSERT INTO tbl VALUES (100, 'a'), (101, 'b')") + + assert ( + node1.query( + "SELECT count() FROM system.projection_parts WHERE database='default' AND table='tbl' AND name='prjmax'" + ) + == "2\n" + ) + + backup_name = new_backup_name() + node1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}") + + node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") + + assert ( + node1.query( + "SELECT count() FROM system.projection_parts WHERE database='default' AND table='tbl' AND name='prjmax'" + ) + == "0\n" + ) + + node1.query(f"RESTORE TABLE tbl FROM {backup_name}") + + assert node1.query("SELECT * FROM tbl ORDER BY x") == TSV( + [[0, "0"], [1, "1"], [2, "2"], [100, "a"], [101, "b"]] + ) + + assert ( + node1.query( + "SELECT count() FROM system.projection_parts WHERE database='default' AND table='tbl' AND name='prjmax'" + ) + == "2\n" + ) + + +def test_replicated_table_with_not_synced_def(): + node1.query( + "CREATE TABLE tbl (" + "x UInt8, y String" + ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/', '{replica}')" + "ORDER BY tuple()" + ) + + node2.query( + "CREATE TABLE tbl (" + "x UInt8, y String" + ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/', '{replica}')" + "ORDER BY tuple()" + ) + + node2.query("SYSTEM STOP REPLICATION QUEUES tbl") + node1.query("ALTER TABLE tbl MODIFY COLUMN x String") + + # Not synced because the replication queue is stopped + assert node1.query( + "SELECT name, type FROM system.columns WHERE database='default' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + assert node2.query( + "SELECT name, type FROM system.columns WHERE database='default' AND table='tbl'" + ) == TSV([["x", "UInt8"], ["y", "String"]]) + + backup_name = new_backup_name() + node2.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}") + + node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") + + # But synced after RESTORE anyway + node1.query( + f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} SETTINGS replica_num_in_backup=1" + ) + assert node1.query( + "SELECT name, type FROM system.columns WHERE database='default' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + assert node2.query( + "SELECT name, type FROM system.columns WHERE database='default' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + + node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") + + node2.query( + f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} SETTINGS replica_num_in_backup=2" + ) + assert node1.query( + "SELECT name, type FROM system.columns WHERE database='default' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + assert node2.query( + "SELECT name, type FROM system.columns WHERE database='default' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + + +def test_table_in_replicated_database_with_not_synced_def(): + node1.query( + "CREATE DATABASE mydb ON CLUSTER 'cluster' ENGINE=Replicated('/clickhouse/path/','{shard}','{replica}')" + ) + + node1.query( + "CREATE TABLE mydb.tbl (x UInt8, y String) ENGINE=ReplicatedMergeTree ORDER BY tuple()" + ) + + node1.query("ALTER TABLE mydb.tbl MODIFY COLUMN x String") + + backup_name = new_backup_name() + node2.query(f"BACKUP DATABASE mydb ON CLUSTER 'cluster' TO {backup_name}") + + node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' NO DELAY") + + # But synced after RESTORE anyway + node1.query( + f"RESTORE DATABASE mydb ON CLUSTER 'cluster' FROM {backup_name} SETTINGS replica_num_in_backup=1" + ) + assert node1.query( + "SELECT name, type FROM system.columns WHERE database='mydb' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + assert node2.query( + "SELECT name, type FROM system.columns WHERE database='mydb' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + + node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' NO DELAY") + + node2.query( + f"RESTORE DATABASE mydb ON CLUSTER 'cluster' FROM {backup_name} SETTINGS replica_num_in_backup=2" + ) + assert node1.query( + "SELECT name, type FROM system.columns WHERE database='mydb' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + assert node2.query( + "SELECT name, type FROM system.columns WHERE database='mydb' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) diff --git a/tests/integration/test_dictionaries_postgresql/configs/dictionaries/postgres_dict.xml b/tests/integration/test_dictionaries_postgresql/configs/dictionaries/postgres_dict.xml index 8b69d74b67c..dc4e474f125 100644 --- a/tests/integration/test_dictionaries_postgresql/configs/dictionaries/postgres_dict.xml +++ b/tests/integration/test_dictionaries_postgresql/configs/dictionaries/postgres_dict.xml @@ -4,7 +4,7 @@ dict0 - clickhouse + postgres_database postgres1 5432 postgres @@ -38,7 +38,7 @@ dict1 - clickhouse + postgres_database postgres mysecretpassword test1
diff --git a/tests/integration/test_dictionaries_postgresql/configs/named_collections.xml b/tests/integration/test_dictionaries_postgresql/configs/named_collections.xml index 647840848fd..d08bc5b32c2 100644 --- a/tests/integration/test_dictionaries_postgresql/configs/named_collections.xml +++ b/tests/integration/test_dictionaries_postgresql/configs/named_collections.xml @@ -5,7 +5,7 @@ mysecretpassword postgres1 5432 - clickhouse + postgres_database test_table
@@ -13,7 +13,7 @@ mysecretpassword postgres1 5432 - clickhouse + postgres_database test_table
test_schema
diff --git a/tests/integration/test_dictionaries_postgresql/test.py b/tests/integration/test_dictionaries_postgresql/test.py index 49a75a09e4e..516ac27ea26 100644 --- a/tests/integration/test_dictionaries_postgresql/test.py +++ b/tests/integration/test_dictionaries_postgresql/test.py @@ -1,9 +1,11 @@ import pytest import time +import logging import psycopg2 from multiprocessing.dummy import Pool from helpers.cluster import ClickHouseCluster +from helpers.postgres_utility import get_postgres_conn from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT cluster = ClickHouseCluster(__file__) @@ -18,62 +20,40 @@ node1 = cluster.add_instance( with_postgres_cluster=True, ) -postgres_dict_table_template = """ - CREATE TABLE IF NOT EXISTS {} ( - id Integer NOT NULL, key Integer NOT NULL, value Integer NOT NULL, PRIMARY KEY (id)) - """ -click_dict_table_template = """ - CREATE TABLE IF NOT EXISTS `test`.`dict_table_{}` ( - `key` UInt32, `value` UInt32 - ) ENGINE = Dictionary({}) - """ - - -def get_postgres_conn(ip, port, database=False): - if database == True: - conn_string = "host={} port={} dbname='clickhouse' user='postgres' password='mysecretpassword'".format( - ip, port - ) - else: - conn_string = ( - "host={} port={} user='postgres' password='mysecretpassword'".format( - ip, port - ) - ) - - conn = psycopg2.connect(conn_string) - conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) - conn.autocommit = True - return conn - def create_postgres_db(conn, name): cursor = conn.cursor() - cursor.execute("CREATE DATABASE {}".format(name)) + cursor.execute(f"CREATE DATABASE {name}") def create_postgres_table(cursor, table_name): - cursor.execute(postgres_dict_table_template.format(table_name)) + cursor.execute( + f""" + CREATE TABLE IF NOT EXISTS {table_name} ( + id Integer NOT NULL, key Integer NOT NULL, value Integer NOT NULL, PRIMARY KEY (id)) + """ + ) def create_and_fill_postgres_table(cursor, table_name, port, host): create_postgres_table(cursor, table_name) # Fill postgres table using clickhouse postgres table function and check - table_func = """postgresql('{}:{}', 'clickhouse', '{}', 'postgres', 'mysecretpassword')""".format( - host, port, table_name - ) + table_func = f"""postgresql('{host}:{port}', 'postgres_database', '{table_name}', 'postgres', 'mysecretpassword')""" node1.query( - """INSERT INTO TABLE FUNCTION {} SELECT number, number, number from numbers(10000) - """.format( - table_func, table_name - ) + f"""INSERT INTO TABLE FUNCTION {table_func} SELECT number, number, number from numbers(10000)""" ) - result = node1.query("SELECT count() FROM {}".format(table_func)) + result = node1.query(f"SELECT count() FROM {table_func}") assert result.rstrip() == "10000" def create_dict(table_name, index=0): - node1.query(click_dict_table_template.format(table_name, "dict" + str(index))) + node1.query( + f""" + CREATE TABLE IF NOT EXISTS `test`.`dict_table_{table_name}` ( + `key` UInt32, `value` UInt32 + ) ENGINE = Dictionary(dict{str(index)}) + """ + ) @pytest.fixture(scope="module") @@ -85,14 +65,14 @@ def started_cluster(): postgres_conn = get_postgres_conn( ip=cluster.postgres_ip, port=cluster.postgres_port ) - print("postgres1 connected") - create_postgres_db(postgres_conn, "clickhouse") + logging.debug("postgres1 connected") + create_postgres_db(postgres_conn, "postgres_database") - postgres_conn = get_postgres_conn( + postgres2_conn = get_postgres_conn( ip=cluster.postgres2_ip, port=cluster.postgres_port ) - print("postgres2 connected") - create_postgres_db(postgres_conn, "clickhouse") + logging.debug("postgres2 connected") + create_postgres_db(postgres2_conn, "postgres_database") yield cluster @@ -117,27 +97,22 @@ def test_load_dictionaries(started_cluster): create_dict(table_name) dict_name = "dict0" - node1.query("SYSTEM RELOAD DICTIONARY {}".format(dict_name)) + node1.query(f"SYSTEM RELOAD DICTIONARY {dict_name}") assert ( - node1.query( - "SELECT count() FROM `test`.`dict_table_{}`".format(table_name) - ).rstrip() + node1.query(f"SELECT count() FROM `test`.`dict_table_{table_name}`").rstrip() == "10000" ) assert ( - node1.query("SELECT dictGetUInt32('{}', 'key', toUInt64(0))".format(dict_name)) - == "0\n" + node1.query(f"SELECT dictGetUInt32('{dict_name}', 'key', toUInt64(0))") == "0\n" ) assert ( - node1.query( - "SELECT dictGetUInt32('{}', 'value', toUInt64(9999))".format(dict_name) - ) + node1.query(f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(9999))") == "9999\n" ) - cursor.execute("DROP TABLE IF EXISTS {}".format(table_name)) - node1.query("DROP TABLE IF EXISTS {}".format(table_name)) - node1.query("DROP DICTIONARY IF EXISTS {}".format(dict_name)) + cursor.execute(f"DROP TABLE IF EXISTS {table_name}") + node1.query(f"DROP TABLE IF EXISTS {table_name}") + node1.query(f"DROP DICTIONARY IF EXISTS {dict_name}") def test_postgres_dictionaries_custom_query_full_load(started_cluster): @@ -159,7 +134,7 @@ def test_postgres_dictionaries_custom_query_full_load(started_cluster): query = node1.query query( - """ + f""" CREATE DICTIONARY test_dictionary_custom_query ( id UInt64, @@ -169,16 +144,14 @@ def test_postgres_dictionaries_custom_query_full_load(started_cluster): PRIMARY KEY id LAYOUT(FLAT()) SOURCE(PostgreSQL( - DB 'clickhouse' - HOST '{}' - PORT {} + DB 'postgres_database' + HOST '{started_cluster.postgres_ip}' + PORT {started_cluster.postgres_port} USER 'postgres' PASSWORD 'mysecretpassword' QUERY $doc$SELECT id, value_1, value_2 FROM test_table_1 INNER JOIN test_table_2 USING (id);$doc$)) LIFETIME(0) - """.format( - started_cluster.postgres_ip, started_cluster.postgres_port - ) + """ ) result = query("SELECT id, value_1, value_2 FROM test_dictionary_custom_query") @@ -210,7 +183,7 @@ def test_postgres_dictionaries_custom_query_partial_load_simple_key(started_clus query = node1.query query( - """ + f""" CREATE DICTIONARY test_dictionary_custom_query ( id UInt64, @@ -220,15 +193,13 @@ def test_postgres_dictionaries_custom_query_partial_load_simple_key(started_clus PRIMARY KEY id LAYOUT(DIRECT()) SOURCE(PostgreSQL( - DB 'clickhouse' - HOST '{}' - PORT {} + DB 'postgres_database' + HOST '{started_cluster.postgres_ip}' + PORT {started_cluster.postgres_port} USER 'postgres' PASSWORD 'mysecretpassword' QUERY $doc$SELECT id, value_1, value_2 FROM test_table_1 INNER JOIN test_table_2 USING (id) WHERE {{condition}};$doc$)) - """.format( - started_cluster.postgres_ip, started_cluster.postgres_port - ) + """ ) result = query( @@ -262,7 +233,7 @@ def test_postgres_dictionaries_custom_query_partial_load_complex_key(started_clu query = node1.query query( - """ + f""" CREATE DICTIONARY test_dictionary_custom_query ( id UInt64, @@ -273,15 +244,13 @@ def test_postgres_dictionaries_custom_query_partial_load_complex_key(started_clu PRIMARY KEY id, key LAYOUT(COMPLEX_KEY_DIRECT()) SOURCE(PostgreSQL( - DB 'clickhouse' - HOST '{}' - PORT {} + DB 'postgres_database' + HOST '{started_cluster.postgres_ip}' + PORT {started_cluster.postgres_port} USER 'postgres' PASSWORD 'mysecretpassword' QUERY $doc$SELECT id, key, value_1, value_2 FROM test_table_1 INNER JOIN test_table_2 USING (id, key) WHERE {{condition}};$doc$)) - """.format( - started_cluster.postgres_ip, started_cluster.postgres_port - ) + """ ) result = query( @@ -314,70 +283,56 @@ def test_invalidate_query(started_cluster): # invalidate query: SELECT value FROM test0 WHERE id = 0 dict_name = "dict0" create_dict(table_name) - node1.query("SYSTEM RELOAD DICTIONARY {}".format(dict_name)) + node1.query(f"SYSTEM RELOAD DICTIONARY {dict_name}") assert ( - node1.query( - "SELECT dictGetUInt32('{}', 'value', toUInt64(0))".format(dict_name) - ) + node1.query(f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(0))") == "0\n" ) assert ( - node1.query( - "SELECT dictGetUInt32('{}', 'value', toUInt64(1))".format(dict_name) - ) + node1.query(f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(1))") == "1\n" ) # update should happen - cursor.execute("UPDATE {} SET value=value+1 WHERE id = 0".format(table_name)) + cursor.execute(f"UPDATE {table_name} SET value=value+1 WHERE id = 0") while True: result = node1.query( - "SELECT dictGetUInt32('{}', 'value', toUInt64(0))".format(dict_name) + f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(0))" ) if result != "0\n": break assert ( - node1.query( - "SELECT dictGetUInt32('{}', 'value', toUInt64(0))".format(dict_name) - ) + node1.query(f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(0))") == "1\n" ) # no update should happen - cursor.execute("UPDATE {} SET value=value*2 WHERE id != 0".format(table_name)) + cursor.execute(f"UPDATE {table_name} SET value=value*2 WHERE id != 0") time.sleep(5) assert ( - node1.query( - "SELECT dictGetUInt32('{}', 'value', toUInt64(0))".format(dict_name) - ) + node1.query(f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(0))") == "1\n" ) assert ( - node1.query( - "SELECT dictGetUInt32('{}', 'value', toUInt64(1))".format(dict_name) - ) + node1.query(f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(1))") == "1\n" ) # update should happen - cursor.execute("UPDATE {} SET value=value+1 WHERE id = 0".format(table_name)) + cursor.execute(f"UPDATE {table_name} SET value=value+1 WHERE id = 0") time.sleep(5) assert ( - node1.query( - "SELECT dictGetUInt32('{}', 'value', toUInt64(0))".format(dict_name) - ) + node1.query(f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(0))") == "2\n" ) assert ( - node1.query( - "SELECT dictGetUInt32('{}', 'value', toUInt64(1))".format(dict_name) - ) + node1.query(f"SELECT dictGetUInt32('{dict_name}', 'value', toUInt64(1))") == "2\n" ) - node1.query("DROP TABLE IF EXISTS {}".format(table_name)) - node1.query("DROP DICTIONARY IF EXISTS {}".format(dict_name)) - cursor.execute("DROP TABLE IF EXISTS {}".format(table_name)) + node1.query(f"DROP TABLE IF EXISTS {table_name}") + node1.query(f"DROP DICTIONARY IF EXISTS {dict_name}") + cursor.execute(f"DROP TABLE IF EXISTS {table_name}") def test_dictionary_with_replicas(started_cluster): @@ -446,7 +401,7 @@ def test_postgres_schema(started_cluster): host 'postgres1' user 'postgres' password 'mysecretpassword' - db 'clickhouse' + db 'postgres_database' table 'test_schema.test_table')) LIFETIME(MIN 1 MAX 2) LAYOUT(HASHED()); @@ -458,6 +413,8 @@ def test_postgres_schema(started_cluster): result = node1.query("SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(99))") assert int(result.strip()) == 99 node1.query("DROP DICTIONARY IF EXISTS postgres_dict") + cursor.execute("DROP TABLE test_schema.test_table") + cursor.execute("DROP SCHEMA test_schema") def test_predefined_connection_configuration(started_cluster): @@ -566,7 +523,7 @@ def test_bad_configuration(started_cluster): host 'postgres1' user 'postgres' password 'mysecretpassword' - dbbb 'clickhouse' + dbbb 'postgres_database' table 'test_schema.test_table')) LIFETIME(MIN 1 MAX 2) LAYOUT(HASHED()); diff --git a/tests/integration/test_dictionaries_redis/test_long.py b/tests/integration/test_dictionaries_redis/test_long.py index 094df789704..19b03322b4d 100644 --- a/tests/integration/test_dictionaries_redis/test_long.py +++ b/tests/integration/test_dictionaries_redis/test_long.py @@ -2,7 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster import redis -cluster = ClickHouseCluster(__file__) +cluster = ClickHouseCluster(__file__, name="long") node = cluster.add_instance("node", with_redis=True) diff --git a/tests/integration/test_keeper_mntr_pressure/__init__.py b/tests/integration/test_keeper_mntr_pressure/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_mntr_pressure/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_mntr_pressure/config/enable_keeper1.xml b/tests/integration/test_keeper_mntr_pressure/config/enable_keeper1.xml new file mode 100644 index 00000000000..17455ed12f5 --- /dev/null +++ b/tests/integration/test_keeper_mntr_pressure/config/enable_keeper1.xml @@ -0,0 +1,41 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + 75 + trace + + + + + 1 + node1 + 9234 + true + 3 + + + 2 + node2 + 9234 + true + true + 2 + + + 3 + node3 + 9234 + true + true + 1 + + + + diff --git a/tests/integration/test_keeper_mntr_pressure/config/enable_keeper2.xml b/tests/integration/test_keeper_mntr_pressure/config/enable_keeper2.xml new file mode 100644 index 00000000000..03a23984cc2 --- /dev/null +++ b/tests/integration/test_keeper_mntr_pressure/config/enable_keeper2.xml @@ -0,0 +1,41 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + 75 + trace + + + + + 1 + node1 + 9234 + true + 3 + + + 2 + node2 + 9234 + true + true + 2 + + + 3 + node3 + 9234 + true + true + 1 + + + + diff --git a/tests/integration/test_keeper_mntr_pressure/config/enable_keeper3.xml b/tests/integration/test_keeper_mntr_pressure/config/enable_keeper3.xml new file mode 100644 index 00000000000..a69cabf8c54 --- /dev/null +++ b/tests/integration/test_keeper_mntr_pressure/config/enable_keeper3.xml @@ -0,0 +1,40 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + true + 3 + + + 2 + node2 + 9234 + true + true + 2 + + + 3 + node3 + 9234 + true + true + 1 + + + + diff --git a/tests/integration/test_keeper_mntr_pressure/test.py b/tests/integration/test_keeper_mntr_pressure/test.py new file mode 100644 index 00000000000..471767210d6 --- /dev/null +++ b/tests/integration/test_keeper_mntr_pressure/test.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 + +from helpers.cluster import ClickHouseCluster +import pytest +import random +import string +import os +import time +from io import StringIO +import socket +import threading + +from helpers.network import PartitionManager + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", main_configs=["config/enable_keeper1.xml"], stay_alive=True +) +node2 = cluster.add_instance( + "node2", main_configs=["config/enable_keeper2.xml"], stay_alive=True +) +node3 = cluster.add_instance( + "node3", main_configs=["config/enable_keeper3.xml"], stay_alive=True +) + +NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def get_keeper_socket(node_name): + hosts = cluster.get_instance_ip(node_name) + client = socket.socket() + client.settimeout(10) + client.connect((hosts, 9181)) + return client + + +def close_keeper_socket(cli): + if cli is not None: + cli.close() + + +def send_4lw_cmd(node_name, cmd="ruok"): + client = None + try: + client = get_keeper_socket(node_name) + client.send(cmd.encode()) + data = client.recv(100_000) + data = data.decode() + return data + finally: + if client is not None: + client.close() + + +def test_aggressive_mntr(started_cluster): + def go_mntr(node_name): + for _ in range(100000): + print(node_name, send_4lw_cmd(node_name, "mntr")) + + node1_thread = threading.Thread(target=lambda: go_mntr(node1.name)) + node2_thread = threading.Thread(target=lambda: go_mntr(node2.name)) + node3_thread = threading.Thread(target=lambda: go_mntr(node3.name)) + node1_thread.start() + node2_thread.start() + node3_thread.start() + + node2.stop_clickhouse() + node3.stop_clickhouse() + + while send_4lw_cmd(node1.name, "mntr") != NOT_SERVING_REQUESTS_ERROR_MSG: + time.sleep(0.2) + + node1.stop_clickhouse() + starters = [] + for node in [node1, node2, node3]: + start_thread = threading.Thread(target=lambda: node.start_clickhouse()) + start_thread.start() + starters.append(start_thread) + + for start_thread in starters: + start_thread.join() + + node1_thread.join() + node2_thread.join() + node3_thread.join() + + for node in [node1, node2, node3]: + assert not node.contains_in_log("LOGICAL_ERROR") diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py index aabf3507d8f..5619c551c71 100644 --- a/tests/integration/test_postgresql_database_engine/test.py +++ b/tests/integration/test_postgresql_database_engine/test.py @@ -3,6 +3,7 @@ import psycopg2 from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry +from helpers.postgres_utility import get_postgres_conn from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT cluster = ClickHouseCluster(__file__) @@ -20,17 +21,6 @@ postgres_drop_table_template = """ """ -def get_postgres_conn(cluster, database=False): - if database == True: - conn_string = f"host={cluster.postgres_ip} port={cluster.postgres_port} dbname='test_database' user='postgres' password='mysecretpassword'" - else: - conn_string = f"host={cluster.postgres_ip} port={cluster.postgres_port} user='postgres' password='mysecretpassword'" - conn = psycopg2.connect(conn_string) - conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) - conn.autocommit = True - return conn - - def create_postgres_db(cursor, name): cursor.execute("CREATE DATABASE {}".format(name)) @@ -49,9 +39,9 @@ def drop_postgres_table(cursor, table_name): def started_cluster(): try: cluster.start() - conn = get_postgres_conn(cluster) + conn = get_postgres_conn(cluster.postgres_ip, cluster.postgres_port) cursor = conn.cursor() - create_postgres_db(cursor, "test_database") + create_postgres_db(cursor, "postgres_database") yield cluster finally: @@ -60,93 +50,104 @@ def started_cluster(): def test_postgres_database_engine_with_postgres_ddl(started_cluster): # connect to database as well - conn = get_postgres_conn(started_cluster, True) + conn = get_postgres_conn( + started_cluster.postgres_ip, started_cluster.postgres_port, database=True + ) cursor = conn.cursor() node1.query( - "CREATE DATABASE test_database ENGINE = PostgreSQL('postgres1:5432', 'test_database', 'postgres', 'mysecretpassword')" + "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')" ) - assert "test_database" in node1.query("SHOW DATABASES") + assert "postgres_database" in node1.query("SHOW DATABASES") create_postgres_table(cursor, "test_table") - assert "test_table" in node1.query("SHOW TABLES FROM test_database") + assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") cursor.execute("ALTER TABLE test_table ADD COLUMN data Text") assert "data" in node1.query( - "SELECT name FROM system.columns WHERE table = 'test_table' AND database = 'test_database'" + "SELECT name FROM system.columns WHERE table = 'test_table' AND database = 'postgres_database'" ) cursor.execute("ALTER TABLE test_table DROP COLUMN data") assert "data" not in node1.query( - "SELECT name FROM system.columns WHERE table = 'test_table' AND database = 'test_database'" + "SELECT name FROM system.columns WHERE table = 'test_table' AND database = 'postgres_database'" ) - node1.query("DROP DATABASE test_database") - assert "test_database" not in node1.query("SHOW DATABASES") + node1.query("DROP DATABASE postgres_database") + assert "postgres_database" not in node1.query("SHOW DATABASES") drop_postgres_table(cursor, "test_table") def test_postgresql_database_engine_with_clickhouse_ddl(started_cluster): - conn = get_postgres_conn(started_cluster, True) + conn = get_postgres_conn( + started_cluster.postgres_ip, started_cluster.postgres_port, database=True + ) cursor = conn.cursor() node1.query( - "CREATE DATABASE test_database ENGINE = PostgreSQL('postgres1:5432', 'test_database', 'postgres', 'mysecretpassword')" + "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')" ) create_postgres_table(cursor, "test_table") - assert "test_table" in node1.query("SHOW TABLES FROM test_database") + assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") - node1.query("DROP TABLE test_database.test_table") - assert "test_table" not in node1.query("SHOW TABLES FROM test_database") + node1.query("DROP TABLE postgres_database.test_table") + assert "test_table" not in node1.query("SHOW TABLES FROM postgres_database") - node1.query("ATTACH TABLE test_database.test_table") - assert "test_table" in node1.query("SHOW TABLES FROM test_database") + node1.query("ATTACH TABLE postgres_database.test_table") + assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") - node1.query("DETACH TABLE test_database.test_table") - assert "test_table" not in node1.query("SHOW TABLES FROM test_database") + node1.query("DETACH TABLE postgres_database.test_table") + assert "test_table" not in node1.query("SHOW TABLES FROM postgres_database") - node1.query("ATTACH TABLE test_database.test_table") - assert "test_table" in node1.query("SHOW TABLES FROM test_database") + node1.query("ATTACH TABLE postgres_database.test_table") + assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") - node1.query("DROP DATABASE test_database") - assert "test_database" not in node1.query("SHOW DATABASES") + node1.query("DROP DATABASE postgres_database") + assert "postgres_database" not in node1.query("SHOW DATABASES") drop_postgres_table(cursor, "test_table") def test_postgresql_database_engine_queries(started_cluster): - conn = get_postgres_conn(started_cluster, True) + conn = get_postgres_conn( + started_cluster.postgres_ip, started_cluster.postgres_port, database=True + ) cursor = conn.cursor() node1.query( - "CREATE DATABASE test_database ENGINE = PostgreSQL('postgres1:5432', 'test_database', 'postgres', 'mysecretpassword')" + "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')" ) create_postgres_table(cursor, "test_table") - assert node1.query("SELECT count() FROM test_database.test_table").rstrip() == "0" + assert ( + node1.query("SELECT count() FROM postgres_database.test_table").rstrip() == "0" + ) node1.query( - "INSERT INTO test_database.test_table SELECT number, number from numbers(10000)" + "INSERT INTO postgres_database.test_table SELECT number, number from numbers(10000)" ) assert ( - node1.query("SELECT count() FROM test_database.test_table").rstrip() == "10000" + node1.query("SELECT count() FROM postgres_database.test_table").rstrip() + == "10000" ) drop_postgres_table(cursor, "test_table") - assert "test_table" not in node1.query("SHOW TABLES FROM test_database") + assert "test_table" not in node1.query("SHOW TABLES FROM postgres_database") - node1.query("DROP DATABASE test_database") - assert "test_database" not in node1.query("SHOW DATABASES") + node1.query("DROP DATABASE postgres_database") + assert "postgres_database" not in node1.query("SHOW DATABASES") def test_get_create_table_query_with_multidim_arrays(started_cluster): - conn = get_postgres_conn(started_cluster, True) + conn = get_postgres_conn( + started_cluster.postgres_ip, started_cluster.postgres_port, database=True + ) cursor = conn.cursor() node1.query( - "CREATE DATABASE test_database ENGINE = PostgreSQL('postgres1:5432', 'test_database', 'postgres', 'mysecretpassword')" + "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')" ) cursor.execute( @@ -157,11 +158,11 @@ def test_get_create_table_query_with_multidim_arrays(started_cluster): )""" ) - node1.query("DETACH TABLE test_database.array_columns") - node1.query("ATTACH TABLE test_database.array_columns") + node1.query("DETACH TABLE postgres_database.array_columns") + node1.query("ATTACH TABLE postgres_database.array_columns") node1.query( - "INSERT INTO test_database.array_columns " + "INSERT INTO postgres_database.array_columns " "VALUES (" "[[[1, 1], [1, 1]], [[3, 3], [3, 3]], [[4, 4], [5, 5]]], " "[[[1, NULL], [NULL, 1]], [[NULL, NULL], [NULL, NULL]], [[4, 4], [5, 5]]] " @@ -169,7 +170,7 @@ def test_get_create_table_query_with_multidim_arrays(started_cluster): ) result = node1.query( """ - SELECT * FROM test_database.array_columns""" + SELECT * FROM postgres_database.array_columns""" ) expected = ( "[[[1,1],[1,1]],[[3,3],[3,3]],[[4,4],[5,5]]]\t" @@ -177,64 +178,69 @@ def test_get_create_table_query_with_multidim_arrays(started_cluster): ) assert result == expected - node1.query("DROP DATABASE test_database") - assert "test_database" not in node1.query("SHOW DATABASES") + node1.query("DROP DATABASE postgres_database") + assert "postgres_database" not in node1.query("SHOW DATABASES") drop_postgres_table(cursor, "array_columns") def test_postgresql_database_engine_table_cache(started_cluster): - conn = get_postgres_conn(started_cluster, True) + conn = get_postgres_conn( + started_cluster.postgres_ip, started_cluster.postgres_port, database=True + ) cursor = conn.cursor() node1.query( - "CREATE DATABASE test_database ENGINE = PostgreSQL('postgres1:5432', 'test_database', 'postgres', 'mysecretpassword', '', 1)" + "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword', '', 1)" ) create_postgres_table(cursor, "test_table") assert ( - node1.query("DESCRIBE TABLE test_database.test_table").rstrip() + node1.query("DESCRIBE TABLE postgres_database.test_table").rstrip() == "id\tInt32\t\t\t\t\t\nvalue\tNullable(Int32)" ) cursor.execute("ALTER TABLE test_table ADD COLUMN data Text") assert ( - node1.query("DESCRIBE TABLE test_database.test_table").rstrip() + node1.query("DESCRIBE TABLE postgres_database.test_table").rstrip() == "id\tInt32\t\t\t\t\t\nvalue\tNullable(Int32)" ) - node1.query("DETACH TABLE test_database.test_table") - assert "test_table" not in node1.query("SHOW TABLES FROM test_database") + node1.query("DETACH TABLE postgres_database.test_table") + assert "test_table" not in node1.query("SHOW TABLES FROM postgres_database") - node1.query("ATTACH TABLE test_database.test_table") - assert "test_table" in node1.query("SHOW TABLES FROM test_database") + node1.query("ATTACH TABLE postgres_database.test_table") + assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") assert ( - node1.query("DESCRIBE TABLE test_database.test_table").rstrip() + node1.query("DESCRIBE TABLE postgres_database.test_table").rstrip() == "id\tInt32\t\t\t\t\t\nvalue\tNullable(Int32)\t\t\t\t\t\ndata\tNullable(String)" ) - node1.query("DROP TABLE test_database.test_table") - assert "test_table" not in node1.query("SHOW TABLES FROM test_database") + node1.query("DROP TABLE postgres_database.test_table") + assert "test_table" not in node1.query("SHOW TABLES FROM postgres_database") - node1.query("ATTACH TABLE test_database.test_table") - assert "test_table" in node1.query("SHOW TABLES FROM test_database") + node1.query("ATTACH TABLE postgres_database.test_table") + assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") node1.query( - "INSERT INTO test_database.test_table SELECT number, number, toString(number) from numbers(10000)" + "INSERT INTO postgres_database.test_table SELECT number, number, toString(number) from numbers(10000)" ) assert ( - node1.query("SELECT count() FROM test_database.test_table").rstrip() == "10000" + node1.query("SELECT count() FROM postgres_database.test_table").rstrip() + == "10000" ) cursor.execute("DROP TABLE test_table;") - assert "test_table" not in node1.query("SHOW TABLES FROM test_database") + assert "test_table" not in node1.query("SHOW TABLES FROM postgres_database") - node1.query("DROP DATABASE test_database") - assert "test_database" not in node1.query("SHOW DATABASES") + node1.query("DROP DATABASE postgres_database") + assert "postgres_database" not in node1.query("SHOW DATABASES") def test_postgresql_database_with_schema(started_cluster): - conn = get_postgres_conn(started_cluster, True) + conn = get_postgres_conn( + started_cluster.postgres_ip, started_cluster.postgres_port, database=True + ) cursor = conn.cursor() cursor.execute("CREATE SCHEMA test_schema") @@ -243,17 +249,23 @@ def test_postgresql_database_with_schema(started_cluster): cursor.execute("CREATE TABLE table3 (a integer)") node1.query( - "CREATE DATABASE test_database ENGINE = PostgreSQL('postgres1:5432', 'test_database', 'postgres', 'mysecretpassword', 'test_schema')" + "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword', 'test_schema')" ) - assert node1.query("SHOW TABLES FROM test_database") == "table1\ntable2\n" + assert node1.query("SHOW TABLES FROM postgres_database") == "table1\ntable2\n" - node1.query("INSERT INTO test_database.table1 SELECT number from numbers(10000)") - assert node1.query("SELECT count() FROM test_database.table1").rstrip() == "10000" - node1.query("DETACH TABLE test_database.table1") - node1.query("ATTACH TABLE test_database.table1") - assert node1.query("SELECT count() FROM test_database.table1").rstrip() == "10000" - node1.query("DROP DATABASE test_database") + node1.query( + "INSERT INTO postgres_database.table1 SELECT number from numbers(10000)" + ) + assert ( + node1.query("SELECT count() FROM postgres_database.table1").rstrip() == "10000" + ) + node1.query("DETACH TABLE postgres_database.table1") + node1.query("ATTACH TABLE postgres_database.table1") + assert ( + node1.query("SELECT count() FROM postgres_database.table1").rstrip() == "10000" + ) + node1.query("DROP DATABASE postgres_database") cursor.execute("DROP SCHEMA test_schema CASCADE") cursor.execute("DROP TABLE table3") @@ -321,17 +333,20 @@ def test_predefined_connection_configuration(started_cluster): def test_postgres_database_old_syntax(started_cluster): - conn = get_postgres_conn(started_cluster, True) + conn = get_postgres_conn( + started_cluster.postgres_ip, started_cluster.postgres_port, database=True + ) cursor = conn.cursor() node1.query( """ - DROP DATABASE IF EXISTS test_database; - CREATE DATABASE test_database ENGINE = PostgreSQL('postgres1:5432', 'test_database', 'postgres', 'mysecretpassword', 1); + CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword', 1); """ ) create_postgres_table(cursor, "test_table") - assert "test_table" in node1.query("SHOW TABLES FROM test_database") + assert "test_table" in node1.query("SHOW TABLES FROM postgres_database") + cursor.execute(f"DROP TABLE test_table") + node1.query("DROP DATABASE IF EXISTS postgres_database;") if __name__ == "__main__": diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index b5354c0e01b..39be0d564df 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -110,7 +110,7 @@ def test_s3_zero_copy_replication(cluster, policy): ) node1.query("INSERT INTO s3_test VALUES (0,'data'),(1,'data')") - node2.query("SYSTEM SYNC REPLICA s3_test") + node2.query("SYSTEM SYNC REPLICA s3_test", timeout=30) assert ( node1.query("SELECT * FROM s3_test order by id FORMAT Values") == "(0,'data'),(1,'data')" @@ -124,7 +124,7 @@ def test_s3_zero_copy_replication(cluster, policy): assert get_large_objects_count(cluster) == 1 node2.query("INSERT INTO s3_test VALUES (2,'data'),(3,'data')") - node1.query("SYSTEM SYNC REPLICA s3_test") + node1.query("SYSTEM SYNC REPLICA s3_test", timeout=30) assert ( node2.query("SELECT * FROM s3_test order by id FORMAT Values") @@ -166,7 +166,7 @@ def test_s3_zero_copy_on_hybrid_storage(cluster): ) node1.query("INSERT INTO hybrid_test VALUES (0,'data'),(1,'data')") - node2.query("SYSTEM SYNC REPLICA hybrid_test") + node2.query("SYSTEM SYNC REPLICA hybrid_test", timeout=30) assert ( node1.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") @@ -292,7 +292,7 @@ def test_s3_zero_copy_with_ttl_move(cluster, storage_policy, large_data, iterati node1.query("INSERT INTO ttl_move_test VALUES (11, now() - INTERVAL 1 DAY)") node1.query("OPTIMIZE TABLE ttl_move_test FINAL") - node2.query("SYSTEM SYNC REPLICA ttl_move_test") + node2.query("SYSTEM SYNC REPLICA ttl_move_test", timeout=30) if large_data: assert ( @@ -362,8 +362,8 @@ def test_s3_zero_copy_with_ttl_delete(cluster, large_data, iterations): node1.query("OPTIMIZE TABLE ttl_delete_test FINAL") - node1.query("SYSTEM SYNC REPLICA ttl_delete_test") - node2.query("SYSTEM SYNC REPLICA ttl_delete_test") + node1.query("SYSTEM SYNC REPLICA ttl_delete_test", timeout=30) + node2.query("SYSTEM SYNC REPLICA ttl_delete_test", timeout=30) if large_data: assert ( @@ -445,7 +445,7 @@ def s3_zero_copy_unfreeze_base(cluster, unfreeze_query_template): check_objects_exisis(cluster, objects01) node1.query("TRUNCATE TABLE unfreeze_test") - node2.query("SYSTEM SYNC REPLICA unfreeze_test") + node2.query("SYSTEM SYNC REPLICA unfreeze_test", timeout=30) objects11 = node1.get_backuped_s3_objects("s31", "freeze_backup1") objects12 = node2.get_backuped_s3_objects("s31", "freeze_backup2") @@ -499,7 +499,7 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): node1.query("ALTER TABLE drop_detached_test FREEZE WITH NAME 'detach_backup1'") node1.query("INSERT INTO drop_detached_test VALUES (1)") node1.query("ALTER TABLE drop_detached_test FREEZE WITH NAME 'detach_backup2'") - node2.query("SYSTEM SYNC REPLICA drop_detached_test") + node2.query("SYSTEM SYNC REPLICA drop_detached_test", timeout=30) objects1 = node1.get_backuped_s3_objects("s31", "detach_backup1") objects2 = node1.get_backuped_s3_objects("s31", "detach_backup2") @@ -511,7 +511,7 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): node1.query("ALTER TABLE drop_detached_test DETACH PARTITION '0'") node1.query("ALTER TABLE drop_detached_test DETACH PARTITION '1'") - node2.query("SYSTEM SYNC REPLICA drop_detached_test") + node2.query("SYSTEM SYNC REPLICA drop_detached_test", timeout=30) wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) @@ -523,7 +523,7 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): "ALTER TABLE drop_detached_test DROP DETACHED PARTITION '1'", settings={"allow_drop_detached": 1}, ) - node1.query("SYSTEM SYNC REPLICA drop_detached_test") + node1.query("SYSTEM SYNC REPLICA drop_detached_test", timeout=30) wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) @@ -534,7 +534,7 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): "ALTER TABLE drop_detached_test DROP DETACHED PARTITION '1'", settings={"allow_drop_detached": 1}, ) - node2.query("SYSTEM SYNC REPLICA drop_detached_test") + node2.query("SYSTEM SYNC REPLICA drop_detached_test", timeout=30) wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) @@ -545,7 +545,7 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): "ALTER TABLE drop_detached_test DROP DETACHED PARTITION '0'", settings={"allow_drop_detached": 1}, ) - node2.query("SYSTEM SYNC REPLICA drop_detached_test") + node2.query("SYSTEM SYNC REPLICA drop_detached_test", timeout=30) wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) @@ -555,7 +555,7 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): "ALTER TABLE drop_detached_test DROP DETACHED PARTITION '0'", settings={"allow_drop_detached": 1}, ) - node1.query("SYSTEM SYNC REPLICA drop_detached_test") + node1.query("SYSTEM SYNC REPLICA drop_detached_test", timeout=30) wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) diff --git a/tests/integration/test_storage_nats/__init__.py b/tests/integration/test_storage_nats/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_storage_nats/clickhouse_path/format_schemas/nats.proto b/tests/integration/test_storage_nats/clickhouse_path/format_schemas/nats.proto new file mode 100755 index 00000000000..090ed917cdd --- /dev/null +++ b/tests/integration/test_storage_nats/clickhouse_path/format_schemas/nats.proto @@ -0,0 +1,6 @@ +syntax = "proto3"; + + message ProtoKeyValue { + uint64 key = 1; + string value = 2; + } diff --git a/tests/integration/test_storage_nats/configs/macros.xml b/tests/integration/test_storage_nats/configs/macros.xml new file mode 100644 index 00000000000..4aa547e049e --- /dev/null +++ b/tests/integration/test_storage_nats/configs/macros.xml @@ -0,0 +1,7 @@ + + + nats1:4444 + macro + JSONEachRow + + diff --git a/tests/integration/test_storage_nats/configs/named_collection.xml b/tests/integration/test_storage_nats/configs/named_collection.xml new file mode 100644 index 00000000000..15817f321f0 --- /dev/null +++ b/tests/integration/test_storage_nats/configs/named_collection.xml @@ -0,0 +1,13 @@ + + + + nats1:4444 + named + JSONEachRow + 111 + 12 + click + house + + + diff --git a/tests/integration/test_storage_nats/configs/nats.xml b/tests/integration/test_storage_nats/configs/nats.xml new file mode 100644 index 00000000000..0a8be9122ad --- /dev/null +++ b/tests/integration/test_storage_nats/configs/nats.xml @@ -0,0 +1,6 @@ + + + click + house + + diff --git a/tests/integration/test_storage_nats/configs/users.xml b/tests/integration/test_storage_nats/configs/users.xml new file mode 100644 index 00000000000..797dfebba0e --- /dev/null +++ b/tests/integration/test_storage_nats/configs/users.xml @@ -0,0 +1,8 @@ + + + + + 1 + + + diff --git a/tests/integration/test_storage_nats/nats_pb2.py b/tests/integration/test_storage_nats/nats_pb2.py new file mode 100644 index 00000000000..4330ff57950 --- /dev/null +++ b/tests/integration/test_storage_nats/nats_pb2.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: clickhouse_path/format_schemas/nats.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n)clickhouse_path/format_schemas/nats.proto"+\n\rProtoKeyValue\x12\x0b\n\x03key\x18\x01 \x01(\x04\x12\r\n\x05value\x18\x02 \x01(\tb\x06proto3' +) + + +_PROTOKEYVALUE = DESCRIPTOR.message_types_by_name["ProtoKeyValue"] +ProtoKeyValue = _reflection.GeneratedProtocolMessageType( + "ProtoKeyValue", + (_message.Message,), + { + "DESCRIPTOR": _PROTOKEYVALUE, + "__module__": "clickhouse_path.format_schemas.nats_pb2" + # @@protoc_insertion_point(class_scope:ProtoKeyValue) + }, +) +_sym_db.RegisterMessage(ProtoKeyValue) + +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + _PROTOKEYVALUE._serialized_start = 45 + _PROTOKEYVALUE._serialized_end = 88 +# @@protoc_insertion_point(module_scope) diff --git a/tests/integration/test_storage_nats/test.py b/tests/integration/test_storage_nats/test.py new file mode 100644 index 00000000000..a952f4b78a6 --- /dev/null +++ b/tests/integration/test_storage_nats/test.py @@ -0,0 +1,1494 @@ +import json +import os.path as p +import random +import subprocess +import threading +import logging +import time +from random import randrange +import math + +import asyncio +import nats +import pytest +from google.protobuf.internal.encoder import _VarintBytes +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster, check_nats_is_available +from helpers.test_tools import TSV + +from . import nats_pb2 + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "instance", + main_configs=[ + "configs/nats.xml", + "configs/macros.xml", + "configs/named_collection.xml", + ], + user_configs=["configs/users.xml"], + with_nats=True, + clickhouse_path_dir="clickhouse_path", +) + + +# Helpers + + +def wait_nats_to_start(nats_ip, timeout=180): + start = time.time() + while time.time() - start < timeout: + try: + if asyncio.run(check_nats_is_available(nats_ip)): + logging.debug("NATS is available") + return + time.sleep(0.5) + except Exception as ex: + logging.debug("Can't connect to NATS " + str(ex)) + time.sleep(0.5) + + +def nats_check_result(result, check=False, ref_file="test_nats_json.reference"): + fpath = p.join(p.dirname(__file__), ref_file) + with open(fpath) as reference: + if check: + assert TSV(result) == TSV(reference) + else: + return TSV(result) == TSV(reference) + + +def kill_nats(nats_id): + p = subprocess.Popen(("docker", "stop", nats_id), stdout=subprocess.PIPE) + p.communicate() + return p.returncode == 0 + + +def revive_nats(nats_id, nats_ip): + p = subprocess.Popen(("docker", "start", nats_id), stdout=subprocess.PIPE) + p.communicate() + wait_nats_to_start(nats_ip) + + +# Fixtures + + +@pytest.fixture(scope="module") +def nats_cluster(): + try: + cluster.start() + logging.debug("nats_id is {}".format(instance.cluster.nats_docker_id)) + instance.query("CREATE DATABASE test") + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def nats_setup_teardown(): + print("NATS is available - running test") + yield # run test + instance.query("DROP DATABASE test NO DELAY") + instance.query("CREATE DATABASE test") + + +# Tests + + +async def nats_produce_messages(ip, subject, messages=(), bytes=None): + nc = await nats.connect("{}:4444".format(ip), user="click", password="house") + logging.debug("NATS connection status: " + str(nc.is_connected)) + + for message in messages: + await nc.publish(subject, message.encode()) + if bytes is not None: + await nc.publish(subject, bytes) + logging.debug("Finished publising to " + subject) + + await nc.close() + return messages + + +def check_table_is_ready(instance, table_name): + try: + instance.query("SELECT * FROM {}".format(table_name)) + return True + except Exception: + return False + + +def test_nats_select(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'select', + nats_format = 'JSONEachRow', + nats_row_delimiter = '\\n'; + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "select", messages)) + + # The order of messages in select * from test.nats is not guaranteed, so sleep to collect everything in one select + time.sleep(1) + + result = "" + while True: + result += instance.query( + "SELECT * FROM test.nats ORDER BY key", ignore_error=True + ) + if nats_check_result(result): + break + + nats_check_result(result, True) + + +def test_nats_select_empty(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'empty', + nats_format = 'TSV', + nats_row_delimiter = '\\n'; + """ + ) + + assert int(instance.query("SELECT count() FROM test.nats")) == 0 + + +def test_nats_json_without_delimiter(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'json', + nats_format = 'JSONEachRow'; + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + messages = "" + for i in range(25): + messages += json.dumps({"key": i, "value": i}) + "\n" + + all_messages = [messages] + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "json", all_messages)) + + messages = "" + for i in range(25, 50): + messages += json.dumps({"key": i, "value": i}) + "\n" + all_messages = [messages] + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "json", all_messages)) + + time.sleep(1) + + result = "" + time_limit_sec = 60 + deadline = time.monotonic() + time_limit_sec + + while time.monotonic() < deadline: + result += instance.query( + "SELECT * FROM test.nats ORDER BY key", ignore_error=True + ) + if nats_check_result(result): + break + + nats_check_result(result, True) + + +def test_nats_csv_with_delimiter(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'csv', + nats_format = 'CSV', + nats_row_delimiter = '\\n'; + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + messages = [] + for i in range(50): + messages.append("{i}, {i}".format(i=i)) + + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "csv", messages)) + + time.sleep(1) + + result = "" + for _ in range(60): + result += instance.query( + "SELECT * FROM test.nats ORDER BY key", ignore_error=True + ) + if nats_check_result(result): + break + + nats_check_result(result, True) + + +def test_nats_tsv_with_delimiter(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'tsv', + nats_format = 'TSV', + nats_row_delimiter = '\\n'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.nats; + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + messages = [] + for i in range(50): + messages.append("{i}\t{i}".format(i=i)) + + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "tsv", messages)) + + result = "" + for _ in range(60): + result = instance.query("SELECT * FROM test.view ORDER BY key") + if nats_check_result(result): + break + + nats_check_result(result, True) + + +# + + +def test_nats_macros(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = '{nats_url}', + nats_subjects = '{nats_subjects}', + nats_format = '{nats_format}' + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + message = "" + for i in range(50): + message += json.dumps({"key": i, "value": i}) + "\n" + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "macro", [message])) + + time.sleep(1) + + result = "" + for _ in range(60): + result += instance.query( + "SELECT * FROM test.nats ORDER BY key", ignore_error=True + ) + if nats_check_result(result): + break + + nats_check_result(result, True) + + +def test_nats_materialized_view(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'mv', + nats_format = 'JSONEachRow', + nats_row_delimiter = '\\n'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.nats; + + CREATE TABLE test.view2 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS + SELECT * FROM test.nats group by (key, value); + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "mv", messages)) + + time_limit_sec = 60 + deadline = time.monotonic() + time_limit_sec + + while time.monotonic() < deadline: + result = instance.query("SELECT * FROM test.view ORDER BY key") + if nats_check_result(result): + break + + nats_check_result(result, True) + + deadline = time.monotonic() + time_limit_sec + + while time.monotonic() < deadline: + result = instance.query("SELECT * FROM test.view2 ORDER BY key") + if nats_check_result(result): + break + + nats_check_result(result, True) + + +def test_nats_materialized_view_with_subquery(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'mvsq', + nats_format = 'JSONEachRow', + nats_row_delimiter = '\\n'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM (SELECT * FROM test.nats); + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "mvsq", messages)) + + time_limit_sec = 60 + deadline = time.monotonic() + time_limit_sec + + while time.monotonic() < deadline: + result = instance.query("SELECT * FROM test.view ORDER BY key") + if nats_check_result(result): + break + + nats_check_result(result, True) + + +def test_nats_many_materialized_views(nats_cluster): + instance.query( + """ + DROP TABLE IF EXISTS test.view1; + DROP TABLE IF EXISTS test.view2; + DROP TABLE IF EXISTS test.consumer1; + DROP TABLE IF EXISTS test.consumer2; + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'mmv', + nats_format = 'JSONEachRow', + nats_row_delimiter = '\\n'; + CREATE TABLE test.view1 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE TABLE test.view2 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer1 TO test.view1 AS + SELECT * FROM test.nats; + CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS + SELECT * FROM test.nats; + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "mmv", messages)) + + time_limit_sec = 60 + deadline = time.monotonic() + time_limit_sec + + while time.monotonic() < deadline: + result1 = instance.query("SELECT * FROM test.view1 ORDER BY key") + result2 = instance.query("SELECT * FROM test.view2 ORDER BY key") + if nats_check_result(result1) and nats_check_result(result2): + break + + instance.query( + """ + DROP TABLE test.consumer1; + DROP TABLE test.consumer2; + DROP TABLE test.view1; + DROP TABLE test.view2; + """ + ) + + nats_check_result(result1, True) + nats_check_result(result2, True) + + +def test_nats_protobuf(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value String) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'pb', + nats_format = 'Protobuf', + nats_schema = 'nats.proto:ProtoKeyValue'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.nats; + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + data = b"" + for i in range(0, 20): + msg = nats_pb2.ProtoKeyValue() + msg.key = i + msg.value = str(i) + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "pb", bytes=data)) + data = b"" + for i in range(20, 21): + msg = nats_pb2.ProtoKeyValue() + msg.key = i + msg.value = str(i) + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "pb", bytes=data)) + data = b"" + for i in range(21, 50): + msg = nats_pb2.ProtoKeyValue() + msg.key = i + msg.value = str(i) + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "pb", bytes=data)) + + result = "" + time_limit_sec = 60 + deadline = time.monotonic() + time_limit_sec + + while time.monotonic() < deadline: + result = instance.query("SELECT * FROM test.view ORDER BY key") + if nats_check_result(result): + break + + nats_check_result(result, True) + + +def test_nats_big_message(nats_cluster): + # Create batchs of messages of size ~100Kb + nats_messages = 1000 + batch_messages = 1000 + messages = [ + json.dumps({"key": i, "value": "x" * 100}) * batch_messages + for i in range(nats_messages) + ] + + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value String) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'big', + nats_format = 'JSONEachRow'; + CREATE TABLE test.view (key UInt64, value String) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.nats; + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "big", messages)) + + while True: + result = instance.query("SELECT count() FROM test.view") + if int(result) == batch_messages * nats_messages: + break + + assert ( + int(result) == nats_messages * batch_messages + ), "ClickHouse lost some messages: {}".format(result) + + +def test_nats_mv_combo(nats_cluster): + NUM_MV = 5 + NUM_CONSUMERS = 4 + + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'combo', + nats_num_consumers = {}, + nats_format = 'JSONEachRow', + nats_row_delimiter = '\\n'; + """.format( + NUM_CONSUMERS + ) + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + for mv_id in range(NUM_MV): + instance.query( + """ + DROP TABLE IF EXISTS test.combo_{0}; + DROP TABLE IF EXISTS test.combo_{0}_mv; + CREATE TABLE test.combo_{0} (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.combo_{0}_mv TO test.combo_{0} AS + SELECT * FROM test.nats; + """.format( + mv_id + ) + ) + + time.sleep(2) + + i = [0] + messages_num = 10000 + + def produce(): + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({"key": i[0], "value": i[0]})) + i[0] += 1 + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "combo", messages)) + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = 0 + for mv_id in range(NUM_MV): + result += int( + instance.query("SELECT count() FROM test.combo_{0}".format(mv_id)) + ) + if int(result) == messages_num * threads_num * NUM_MV: + break + time.sleep(1) + + for thread in threads: + thread.join() + + for mv_id in range(NUM_MV): + instance.query( + """ + DROP TABLE test.combo_{0}_mv; + DROP TABLE test.combo_{0}; + """.format( + mv_id + ) + ) + + assert ( + int(result) == messages_num * threads_num * NUM_MV + ), "ClickHouse lost some messages: {}".format(result) + + +def test_nats_insert(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'insert', + nats_format = 'TSV', + nats_row_delimiter = '\\n'; + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + values = [] + for i in range(50): + values.append("({i}, {i})".format(i=i)) + values = ",".join(values) + + insert_messages = [] + + async def sub_to_nats(): + nc = await nats.connect( + "{}:4444".format(nats_cluster.nats_ip), user="click", password="house" + ) + sub = await nc.subscribe("insert") + await sub.unsubscribe(50) + async for msg in sub.messages: + insert_messages.append(msg.data.decode()) + + await sub.drain() + await nc.drain() + + def run_sub(): + asyncio.run(sub_to_nats()) + + thread = threading.Thread(target=run_sub) + thread.start() + time.sleep(1) + + while True: + try: + instance.query("INSERT INTO test.nats VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if "Local: Timed out." in str(e): + continue + else: + raise + thread.join() + + result = "\n".join(insert_messages) + nats_check_result(result, True) + + +def test_nats_many_subjects_insert_wrong(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'insert1,insert2.>,insert3.*.foo', + nats_format = 'TSV', + nats_row_delimiter = '\\n'; + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + values = [] + for i in range(50): + values.append("({i}, {i})".format(i=i)) + values = ",".join(values) + + # no subject specified + instance.query_and_get_error("INSERT INTO test.nats VALUES {}".format(values)) + + # can't insert into wildcard subjects + instance.query_and_get_error( + "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert2.>' VALUES {}".format( + values + ) + ) + instance.query_and_get_error( + "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert3.*.foo' VALUES {}".format( + values + ) + ) + + # specified subject is not among engine's subjects + instance.query_and_get_error( + "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert4' VALUES {}".format( + values + ) + ) + instance.query_and_get_error( + "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert3.foo.baz' VALUES {}".format( + values + ) + ) + instance.query_and_get_error( + "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='foo.insert2' VALUES {}".format( + values + ) + ) + + +def test_nats_many_subjects_insert_right(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'right_insert1,right_insert2', + nats_format = 'TSV', + nats_row_delimiter = '\\n'; + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + values = [] + for i in range(50): + values.append("({i}, {i})".format(i=i)) + values = ",".join(values) + + insert_messages = [] + + async def sub_to_nats(): + nc = await nats.connect( + "{}:4444".format(nats_cluster.nats_ip), user="click", password="house" + ) + sub = await nc.subscribe("right_insert1") + await sub.unsubscribe(50) + async for msg in sub.messages: + insert_messages.append(msg.data.decode()) + + await sub.drain() + await nc.drain() + + def run_sub(): + asyncio.run(sub_to_nats()) + + thread = threading.Thread(target=run_sub) + thread.start() + time.sleep(1) + + while True: + try: + instance.query( + "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='right_insert1' VALUES {}".format( + values + ) + ) + break + except QueryRuntimeException as e: + if "Local: Timed out." in str(e): + continue + else: + raise + thread.join() + + result = "\n".join(insert_messages) + nats_check_result(result, True) + + +def test_nats_many_inserts(nats_cluster): + instance.query( + """ + DROP TABLE IF EXISTS test.nats_many; + DROP TABLE IF EXISTS test.nats_consume; + DROP TABLE IF EXISTS test.view_many; + DROP TABLE IF EXISTS test.consumer_many; + CREATE TABLE test.nats_many (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'many_inserts', + nats_format = 'TSV', + nats_row_delimiter = '\\n'; + CREATE TABLE test.nats_consume (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'many_inserts', + nats_format = 'TSV', + nats_row_delimiter = '\\n'; + CREATE TABLE test.view_many (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer_many TO test.view_many AS + SELECT * FROM test.nats_consume; + """ + ) + while not check_table_is_ready(instance, "test.nats_consume"): + logging.debug("Table test.nats_consume is not yet ready") + time.sleep(0.5) + + messages_num = 10000 + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ",".join(values) + + def insert(): + while True: + try: + instance.query("INSERT INTO test.nats_many VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if "Local: Timed out." in str(e): + continue + else: + raise + + threads = [] + threads_num = 10 + for _ in range(threads_num): + threads.append(threading.Thread(target=insert)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + for thread in threads: + thread.join() + + time_limit_sec = 300 + deadline = time.monotonic() + time_limit_sec + + while time.monotonic() < deadline: + result = instance.query("SELECT count() FROM test.view_many") + print(result, messages_num * threads_num) + if int(result) >= messages_num * threads_num: + break + time.sleep(1) + + instance.query( + """ + DROP TABLE test.nats_consume; + DROP TABLE test.nats_many; + DROP TABLE test.consumer_many; + DROP TABLE test.view_many; + """ + ) + + assert ( + int(result) == messages_num * threads_num + ), "ClickHouse lost some messages or got duplicated ones. Total count: {}".format( + result + ) + + +def test_nats_overloaded_insert(nats_cluster): + instance.query( + """ + DROP TABLE IF EXISTS test.view_overload; + DROP TABLE IF EXISTS test.consumer_overload; + DROP TABLE IF EXISTS test.nats_consume; + CREATE TABLE test.nats_consume (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'over', + nats_num_consumers = 5, + nats_max_block_size = 10000, + nats_format = 'TSV', + nats_row_delimiter = '\\n'; + CREATE TABLE test.nats_overload (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'over', + nats_format = 'TSV', + nats_row_delimiter = '\\n'; + CREATE TABLE test.view_overload (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key + SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3; + CREATE MATERIALIZED VIEW test.consumer_overload TO test.view_overload AS + SELECT * FROM test.nats_consume; + """ + ) + while not check_table_is_ready(instance, "test.nats_consume"): + logging.debug("Table test.nats_consume is not yet ready") + time.sleep(0.5) + + messages_num = 100000 + + def insert(): + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ",".join(values) + + while True: + try: + instance.query( + "INSERT INTO test.nats_overload VALUES {}".format(values) + ) + break + except QueryRuntimeException as e: + if "Local: Timed out." in str(e): + continue + else: + raise + + threads = [] + threads_num = 5 + for _ in range(threads_num): + threads.append(threading.Thread(target=insert)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + time_limit_sec = 300 + deadline = time.monotonic() + time_limit_sec + + while time.monotonic() < deadline: + result = instance.query("SELECT count() FROM test.view_overload") + time.sleep(1) + if int(result) >= messages_num * threads_num: + break + + instance.query( + """ + DROP TABLE test.consumer_overload; + DROP TABLE test.view_overload; + DROP TABLE test.nats_consume; + DROP TABLE test.nats_overload; + """ + ) + + for thread in threads: + thread.join() + + assert ( + int(result) == messages_num * threads_num + ), "ClickHouse lost some messages or got duplicated ones. Total count: {}".format( + result + ) + + +def test_nats_virtual_column(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats_virtuals (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'virtuals', + nats_format = 'JSONEachRow'; + CREATE MATERIALIZED VIEW test.view Engine=Log AS + SELECT value, key, _subject FROM test.nats_virtuals; + """ + ) + while not check_table_is_ready(instance, "test.nats_virtuals"): + logging.debug("Table test.nats_virtuals is not yet ready") + time.sleep(0.5) + + message_num = 10 + i = 0 + messages = [] + for _ in range(message_num): + messages.append(json.dumps({"key": i, "value": i})) + i += 1 + + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "virtuals", messages)) + + while True: + result = instance.query("SELECT count() FROM test.view") + time.sleep(1) + if int(result) == message_num: + break + + result = instance.query( + """ + SELECT key, value, _subject + FROM test.view ORDER BY key + """ + ) + + expected = """\ +0 0 virtuals +1 1 virtuals +2 2 virtuals +3 3 virtuals +4 4 virtuals +5 5 virtuals +6 6 virtuals +7 7 virtuals +8 8 virtuals +9 9 virtuals +""" + + instance.query( + """ + DROP TABLE test.nats_virtuals; + DROP TABLE test.view; + """ + ) + + assert TSV(result) == TSV(expected) + + +def test_nats_virtual_column_with_materialized_view(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats_virtuals_mv (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'virtuals_mv', + nats_format = 'JSONEachRow'; + CREATE TABLE test.view (key UInt64, value UInt64, subject String) ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT *, _subject as subject + FROM test.nats_virtuals_mv; + """ + ) + while not check_table_is_ready(instance, "test.nats_virtuals_mv"): + logging.debug("Table test.nats_virtuals_mv is not yet ready") + time.sleep(0.5) + + message_num = 10 + i = 0 + messages = [] + for _ in range(message_num): + messages.append(json.dumps({"key": i, "value": i})) + i += 1 + + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "virtuals_mv", messages)) + + while True: + result = instance.query("SELECT count() FROM test.view") + time.sleep(1) + if int(result) == message_num: + break + + result = instance.query("SELECT key, value, subject FROM test.view ORDER BY key") + expected = """\ +0 0 virtuals_mv +1 1 virtuals_mv +2 2 virtuals_mv +3 3 virtuals_mv +4 4 virtuals_mv +5 5 virtuals_mv +6 6 virtuals_mv +7 7 virtuals_mv +8 8 virtuals_mv +9 9 virtuals_mv +""" + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + DROP TABLE test.nats_virtuals_mv + """ + ) + + assert TSV(result) == TSV(expected) + + +def test_nats_many_consumers_to_each_queue(nats_cluster): + instance.query( + """ + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + """ + ) + + num_tables = 4 + for table_id in range(num_tables): + print(("Setting up table {}".format(table_id))) + instance.query( + """ + DROP TABLE IF EXISTS test.many_consumers_{0}; + DROP TABLE IF EXISTS test.many_consumers_{0}_mv; + CREATE TABLE test.many_consumers_{0} (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'many_consumers', + nats_num_consumers = 2, + nats_queue_group = 'many_consumers', + nats_format = 'JSONEachRow', + nats_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.many_consumers_{0}_mv TO test.destination AS + SELECT key, value FROM test.many_consumers_{0}; + """.format( + table_id + ) + ) + while not check_table_is_ready( + instance, "test.many_consumers_{}".format(table_id) + ): + logging.debug( + "Table test.many_consumers_{} is not yet ready".format(table_id) + ) + time.sleep(0.5) + + i = [0] + messages_num = 1000 + + def produce(): + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({"key": i[0], "value": i[0]})) + i[0] += 1 + asyncio.run( + nats_produce_messages(nats_cluster.nats_ip, "many_consumers", messages) + ) + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + result1 = "" + while True: + result1 = instance.query("SELECT count() FROM test.destination") + time.sleep(1) + if int(result1) == messages_num * threads_num: + break + + for thread in threads: + thread.join() + + for consumer_id in range(num_tables): + instance.query( + """ + DROP TABLE test.many_consumers_{0}; + DROP TABLE test.many_consumers_{0}_mv; + """.format( + consumer_id + ) + ) + + instance.query( + """ + DROP TABLE test.destination; + """ + ) + + assert ( + int(result1) == messages_num * threads_num + ), "ClickHouse lost some messages: {}".format(result1) + + +def test_nats_restore_failed_connection_without_losses_on_write(nats_cluster): + instance.query( + """ + DROP TABLE IF EXISTS test.consume; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE TABLE test.consume (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'producer_reconnect', + nats_format = 'JSONEachRow', + nats_num_consumers = 2, + nats_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.consume; + DROP TABLE IF EXISTS test.producer_reconnect; + CREATE TABLE test.producer_reconnect (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'producer_reconnect', + nats_format = 'JSONEachRow', + nats_row_delimiter = '\\n'; + """ + ) + while not check_table_is_ready(instance, "test.consume"): + logging.debug("Table test.consume is not yet ready") + time.sleep(0.5) + + messages_num = 100000 + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ",".join(values) + + while True: + try: + instance.query( + "INSERT INTO test.producer_reconnect VALUES {}".format(values) + ) + break + except QueryRuntimeException as e: + if "Local: Timed out." in str(e): + continue + else: + raise + + while int(instance.query("SELECT count() FROM test.view")) == 0: + time.sleep(0.1) + + kill_nats(nats_cluster.nats_docker_id) + time.sleep(4) + revive_nats(nats_cluster.nats_docker_id, nats_cluster.nats_ip) + + while True: + result = instance.query("SELECT count(DISTINCT key) FROM test.view") + time.sleep(1) + if int(result) == messages_num: + break + + instance.query( + """ + DROP TABLE test.consume; + DROP TABLE test.producer_reconnect; + """ + ) + + assert int(result) == messages_num, "ClickHouse lost some messages: {}".format( + result + ) + + +def test_nats_no_connection_at_startup_1(nats_cluster): + # no connection when table is initialized + nats_cluster.pause_container("nats1") + instance.query_and_get_error( + """ + CREATE TABLE test.cs (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'cs', + nats_format = 'JSONEachRow', + nats_num_consumers = '5', + nats_row_delimiter = '\\n'; + """ + ) + nats_cluster.unpause_container("nats1") + + +def test_nats_no_connection_at_startup_2(nats_cluster): + instance.query( + """ + CREATE TABLE test.cs (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'cs', + nats_format = 'JSONEachRow', + nats_num_consumers = '5', + nats_row_delimiter = '\\n'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.cs; + """ + ) + + instance.query("DETACH TABLE test.cs") + nats_cluster.pause_container("nats1") + instance.query("ATTACH TABLE test.cs") + nats_cluster.unpause_container("nats1") + while not check_table_is_ready(instance, "test.cs"): + logging.debug("Table test.cs is not yet ready") + time.sleep(0.5) + + messages_num = 1000 + messages = [] + for i in range(messages_num): + messages.append(json.dumps({"key": i, "value": i})) + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "cs", messages)) + + for _ in range(20): + result = instance.query("SELECT count() FROM test.view") + time.sleep(1) + if int(result) == messages_num: + break + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.cs; + """ + ) + + assert int(result) == messages_num, "ClickHouse lost some messages: {}".format( + result + ) + + +def test_nats_format_factory_settings(nats_cluster): + instance.query( + """ + CREATE TABLE test.format_settings ( + id String, date DateTime + ) ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'format_settings', + nats_format = 'JSONEachRow', + date_time_input_format = 'best_effort'; + """ + ) + while not check_table_is_ready(instance, "test.format_settings"): + logging.debug("Table test.format_settings is not yet ready") + time.sleep(0.5) + + message = json.dumps( + {"id": "format_settings_test", "date": "2021-01-19T14:42:33.1829214Z"} + ) + expected = instance.query( + """SELECT parseDateTimeBestEffort(CAST('2021-01-19T14:42:33.1829214Z', 'String'))""" + ) + + asyncio.run( + nats_produce_messages(nats_cluster.nats_ip, "format_settings", [message]) + ) + + while True: + result = instance.query("SELECT date FROM test.format_settings") + if result == expected: + break + + instance.query( + """ + CREATE TABLE test.view ( + id String, date DateTime + ) ENGINE = MergeTree ORDER BY id; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.format_settings; + """ + ) + + asyncio.run( + nats_produce_messages(nats_cluster.nats_ip, "format_settings", [message]) + ) + while True: + result = instance.query("SELECT date FROM test.view") + if result == expected: + break + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.format_settings; + """ + ) + + assert result == expected + + +def test_nats_bad_args(nats_cluster): + instance.query_and_get_error( + """ + CREATE TABLE test.drop (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_secure = true, + nats_format = 'JSONEachRow'; + """ + ) + + +def test_nats_drop_mv(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS + SETTINGS nats_url = 'nats1:4444', + nats_subjects = 'mv', + nats_format = 'JSONEachRow'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.nats; + """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + messages = [] + for i in range(20): + messages.append(json.dumps({"key": i, "value": i})) + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "mv", messages)) + + instance.query("DROP VIEW test.consumer") + messages = [] + for i in range(20, 40): + messages.append(json.dumps({"key": i, "value": i})) + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "mv", messages)) + + instance.query( + """ + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.nats; + """ + ) + messages = [] + for i in range(40, 50): + messages.append(json.dumps({"key": i, "value": i})) + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "mv", messages)) + + while True: + result = instance.query("SELECT * FROM test.view ORDER BY key") + if nats_check_result(result): + break + + nats_check_result(result, True) + + instance.query("DROP VIEW test.consumer") + messages = [] + for i in range(50, 60): + messages.append(json.dumps({"key": i, "value": i})) + asyncio.run(nats_produce_messages(nats_cluster.nats_ip, "mv", messages)) + + count = 0 + while True: + count = int(instance.query("SELECT count() FROM test.nats")) + if count: + break + + assert count > 0 + + +def test_nats_predefined_configuration(nats_cluster): + instance.query( + """ + CREATE TABLE test.nats (key UInt64, value UInt64) + ENGINE = NATS(nats1) """ + ) + while not check_table_is_ready(instance, "test.nats"): + logging.debug("Table test.nats is not yet ready") + time.sleep(0.5) + + asyncio.run( + nats_produce_messages( + nats_cluster.nats_ip, "named", [json.dumps({"key": 1, "value": 2})] + ) + ) + while True: + result = instance.query( + "SELECT * FROM test.nats ORDER BY key", ignore_error=True + ) + if result == "1\t2\n": + break + + +if __name__ == "__main__": + cluster.start() + input("Cluster created, press any key to destroy...") + cluster.shutdown() diff --git a/tests/integration/test_storage_nats/test_nats_json.reference b/tests/integration/test_storage_nats/test_nats_json.reference new file mode 100644 index 00000000000..959bb2aad74 --- /dev/null +++ b/tests/integration/test_storage_nats/test_nats_json.reference @@ -0,0 +1,50 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 diff --git a/tests/integration/test_storage_postgresql/configs/settings.xml b/tests/integration/test_storage_postgresql/configs/settings.xml new file mode 100644 index 00000000000..7054c274771 --- /dev/null +++ b/tests/integration/test_storage_postgresql/configs/settings.xml @@ -0,0 +1,8 @@ + + + + + 1 + + + diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 1fc0475419c..a3ebbe97451 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -10,7 +10,10 @@ node1 = cluster.add_instance( "node1", main_configs=["configs/named_collections.xml"], with_postgres=True ) node2 = cluster.add_instance( - "node2", main_configs=["configs/named_collections.xml"], with_postgres_cluster=True + "node2", + main_configs=["configs/named_collections.xml"], + user_configs=["configs/settings.xml"], + with_postgres_cluster=True, ) @@ -19,6 +22,7 @@ def started_cluster(): try: cluster.start() node1.query("CREATE DATABASE test") + node2.query("CREATE DATABASE test") yield cluster finally: @@ -640,6 +644,55 @@ def test_uuid(started_cluster): assert result.strip() == "Nullable(UUID)" +def test_auto_close_connection(started_cluster): + conn = get_postgres_conn( + started_cluster.postgres_ip, started_cluster.postgres_port, database=False + ) + cursor = conn.cursor() + database_name = "auto_close_connection_test" + + cursor.execute(f"DROP DATABASE IF EXISTS {database_name}") + cursor.execute(f"CREATE DATABASE {database_name}") + conn = get_postgres_conn( + started_cluster.postgres_ip, + started_cluster.postgres_port, + database=True, + database_name=database_name, + ) + cursor = conn.cursor() + cursor.execute("CREATE TABLE test_table (key integer, value integer)") + + node2.query( + f""" + CREATE TABLE test.test_table (key UInt32, value UInt32) + ENGINE = PostgreSQL(postgres1, database='{database_name}', table='test_table') + """ + ) + + result = node2.query( + "INSERT INTO test.test_table SELECT number, number FROM numbers(1000)", + user="default", + ) + + result = node2.query("SELECT * FROM test.test_table LIMIT 100", user="default") + + node2.query( + f""" + CREATE TABLE test.stat (numbackends UInt32, datname String) + ENGINE = PostgreSQL(postgres1, database='{database_name}', table='pg_stat_database') + """ + ) + + count = int( + node2.query( + f"SELECT numbackends FROM test.stat WHERE datname = '{database_name}'" + ) + ) + + # Connection from python + pg_stat table also has a connection at the moment of current query + assert count == 2 + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 64f104dd4ff..18b1e9d974b 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -30,6 +30,11 @@ instance = cluster.add_instance( stay_alive=True, ) +instance2 = cluster.add_instance( + "instance2", + user_configs=["configs/users.xml"], + with_rabbitmq=True, +) # Helpers @@ -2782,3 +2787,47 @@ def test_rabbitmq_msgpack(rabbitmq_cluster): break time.sleep(1) assert result.strip() == "kek" + + instance.query("drop table rabbit_in sync") + instance.query("drop table rabbit_out sync") + + +def test_rabbitmq_address(rabbitmq_cluster): + + instance2.query( + """ + drop table if exists rabbit_in; + drop table if exists rabbit_out; + create table + rabbit_in (val String) + engine=RabbitMQ + SETTINGS rabbitmq_exchange_name = 'rxhep', + rabbitmq_format = 'CSV', + rabbitmq_num_consumers = 1, + rabbitmq_address='amqp://root:clickhouse@rabbitmq1:5672/'; + create table + rabbit_out (val String) engine=RabbitMQ + SETTINGS rabbitmq_exchange_name = 'rxhep', + rabbitmq_format = 'CSV', + rabbitmq_num_consumers = 1, + rabbitmq_address='amqp://root:clickhouse@rabbitmq1:5672/'; + set stream_like_engine_allow_direct_select=1; + insert into rabbit_out select 'kek'; + """ + ) + + result = "" + try_no = 0 + while True: + result = instance2.query("select * from rabbit_in;") + if result.strip() == "kek": + break + else: + try_no = try_no + 1 + if try_no == 20: + break + time.sleep(1) + assert result.strip() == "kek" + + instance2.query("drop table rabbit_in sync") + instance2.query("drop table rabbit_out sync") diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index ec7c746c549..5dd09ddd362 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1052,61 +1052,61 @@ def test_seekable_formats(started_cluster): instance = started_cluster.instances["dummy"] # type: ClickHouseInstance table_function = f"s3(s3_parquet, structure='a Int32, b String', format='Parquet')" - instance.query( - f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000) settings s3_truncate_on_insert=1" + exec_query_with_retry( + instance, + f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1000000) settings s3_truncate_on_insert=1", ) result = instance.query(f"SELECT count() FROM {table_function}") - assert int(result) == 5000000 + assert int(result) == 1000000 table_function = f"s3(s3_orc, structure='a Int32, b String', format='ORC')" exec_query_with_retry( instance, - f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000) settings s3_truncate_on_insert=1", + f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1000000) settings s3_truncate_on_insert=1", ) - result = instance.query(f"SELECT count() FROM {table_function}") - assert int(result) == 5000000 + result = instance.query( + f"SELECT count() FROM {table_function} SETTINGS max_memory_usage='50M'" + ) + assert int(result) == 1000000 + + instance.query(f"SELECT * FROM {table_function} FORMAT Null") instance.query("SYSTEM FLUSH LOGS") result = instance.query( - f"SELECT formatReadableSize(memory_usage) FROM system.query_log WHERE startsWith(query, 'SELECT count() FROM s3') AND memory_usage > 0 ORDER BY event_time desc" + f"SELECT formatReadableSize(ProfileEvents['ReadBufferFromS3Bytes']) FROM system.query_log WHERE startsWith(query, 'SELECT * FROM s3') AND memory_usage > 0 AND type='QueryFinish' ORDER BY event_time_microseconds DESC LIMIT 1" ) - + result = result.strip() + assert result.endswith("MiB") result = result[: result.index(".")] - assert int(result) < 200 + assert int(result) > 80 def test_seekable_formats_url(started_cluster): bucket = started_cluster.minio_bucket - instance = started_cluster.instances["dummy"] + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance table_function = f"s3(s3_parquet, structure='a Int32, b String', format='Parquet')" - instance.query( - f"insert into table function {table_function} select number, randomString(100) from numbers(5000000) settings s3_truncate_on_insert=1" + exec_query_with_retry( + instance, + f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1000000) settings s3_truncate_on_insert=1", ) - table_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_parquet', 'Parquet', 'a Int32, b String')" result = instance.query(f"SELECT count() FROM {table_function}") - assert int(result) == 5000000 + assert int(result) == 1000000 table_function = f"s3(s3_orc, structure='a Int32, b String', format='ORC')" exec_query_with_retry( instance, - f"insert into table function {table_function} select number, randomString(100) from numbers(5000000) settings s3_truncate_on_insert=1", + f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1000000) settings s3_truncate_on_insert=1", ) - table_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_orc', 'ORC', 'a Int32, b String')" - result = instance.query(f"SELECT count() FROM {table_function}") - assert int(result) == 5000000 - - instance.query("SYSTEM FLUSH LOGS") + table_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_parquet', 'Parquet', 'a Int32, b String')" result = instance.query( - f"SELECT formatReadableSize(memory_usage) FROM system.query_log WHERE startsWith(query, 'SELECT count() FROM url') AND memory_usage > 0 ORDER BY event_time desc" + f"SELECT count() FROM {table_function} SETTINGS max_memory_usage='50M'" ) - - result = result[: result.index(".")] - assert int(result) < 200 + assert int(result) == 1000000 def test_empty_file(started_cluster): diff --git a/tests/integration/test_tlsv1_3/__init__.py b/tests/integration/test_tlsv1_3/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_tlsv1_3/certs/ca-cert.pem b/tests/integration/test_tlsv1_3/certs/ca-cert.pem new file mode 100644 index 00000000000..293e1c7f564 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/ca-cert.pem @@ -0,0 +1,32 @@ +-----BEGIN CERTIFICATE----- +MIIFhTCCA22gAwIBAgIUVRNcr0jCH3vSTxg8QYQH6CCtyF4wDQYJKoZIhvcNAQEL +BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjIwMjE4 +MDk0MzA2WhcNMzIwMjE2MDk0MzA2WjBSMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMQsw +CQYDVQQDDAJjYTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALwojNvu +fXQYQ4tucqNOEDHf2sNgxwxqY6QdtJ+zNfVjsK4I3Vqo8TtzxfDYGolkYem/bYJM +xQar9ehUm9ok/0kJgIo8vDXxxDJtvjz5Fd5oFWJLMxojLE9NSa0A4m18jGfbFNsF +XoU0njiInyzNaU9d4bMpaweseCZdt9Y4LR93FkuhSU/v18lWQPob8SSIij059IZP +sEUxpDOTxclAmG/Knd/6v3ecVFiQgexZM0gCtf7kcw41mxsAaP/mOexodIZDR70Y +LYjL7R2ZGhpClfQc8SO5NSpfEqsfreDX7XoaCTsy7/rqr3Nfiby6sc//awG0Ww/f +FRf2+2BU2xEwOVa3i5wU5raYY6eqFLK9q9c2IWPSqYzAmvhK2pqWQ/iaCU/Q89ow +SbKudJTLK8Y6v9LW4Q8ZLZF+CzS5cI+QEfIYqTLFdInH1BLoxx7cymEv07CDkcTo +2WtV8GdMph2P3U/9NoXQDonjCSj0lQUjgUdcrBPaIIVbIn6/5vfw8LQa8PoGDhIx +AYQkqPR+LHxCqIMzdqKZ+OXD/HPhiigpxLhF7mVRLvvoyrOZVJbcu1qmgCcQw0IE +fWzvWne+9cYC9lgt8+/k6d6B1uhYsIwwhgoj0dffFjc0sF6zfceGK+H1K2JCE0aY +zT1HlvSoZdA7lEs5xbGJnkBHqlOvQ63ynXCzAgMBAAGjUzBRMB0GA1UdDgQWBBTn +AtgFU20JF7kTZCKlY7/hi0kYRzAfBgNVHSMEGDAWgBTnAtgFU20JF7kTZCKlY7/h +i0kYRzAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQCpiWgJ1XUw +a8Bdeznsa57oy+5mqQZWpRVkzTQRHEGV850OGh7WQ6u9kVAHefaHH9hsVxyggton +6/MDsu4KL5jqKmJaIAepPIOw6DTc2zs044I7W/rxRp+w1hL2TS+EahMrSPwdzCcl +NNAM0dXocGylf6qwwMqiYAR1K3UIrlyq4QTr1oEPIqJBkDg1JDYrt4T2DroPjW20 +5hlCQ/tft5ddGL0EFEaKWwAcPFm7jAwJiz2eUqmT6PcmaZ24qPn5RXVkaBAkrSga +1WgM8r3LGu2EKhdiDc5hRJKjS8RZyLvZNNzlL3+N42nGmGZkND5bV6u82OD+qn17 +LRZOt0Cr70HqszSYk/67ijjaa4n/fuuAqorV+yYB8accRXtoi00nxykT+H+yI1rD +swvcrfDvhUgY5zmunWyQUYh0q/2Hj75GbLup3Cd0B4MrBwqyCqcEugM4OSf6aRMr +e/vjeggTVPN08xE1LUkugalx0B0aoO6qFahJ2CmkAcYLLlS2N+F7TMuPavc0kVxD +I3qA5G9zvNCliSLX2+kM+LzslI8+pP/A98bvh6nW4HtZkI0jq1ks7XR0GeOhCI8E +0l/YuElxxgKhN4INKhhMoDKqPib4z8gbmkenR2CenQCpfLMIrhTXZgtw+gvEgpIE +/QK97G8XPqga6zn471wrYJnuyJli+sP7aw== +-----END CERTIFICATE----- diff --git a/tests/integration/test_tlsv1_3/certs/ca-cert.srl b/tests/integration/test_tlsv1_3/certs/ca-cert.srl new file mode 100644 index 00000000000..c02cd0a4526 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/ca-cert.srl @@ -0,0 +1 @@ +05F10C67567FE30795D77AF2540F6AC8D4CF2461 diff --git a/tests/integration/test_tlsv1_3/certs/ca-key.pem b/tests/integration/test_tlsv1_3/certs/ca-key.pem new file mode 100644 index 00000000000..e85dca8553e --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/ca-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQC8KIzb7n10GEOL +bnKjThAx39rDYMcMamOkHbSfszX1Y7CuCN1aqPE7c8Xw2BqJZGHpv22CTMUGq/Xo +VJvaJP9JCYCKPLw18cQybb48+RXeaBViSzMaIyxPTUmtAOJtfIxn2xTbBV6FNJ44 +iJ8szWlPXeGzKWsHrHgmXbfWOC0fdxZLoUlP79fJVkD6G/EkiIo9OfSGT7BFMaQz +k8XJQJhvyp3f+r93nFRYkIHsWTNIArX+5HMONZsbAGj/5jnsaHSGQ0e9GC2Iy+0d +mRoaQpX0HPEjuTUqXxKrH63g1+16Ggk7Mu/66q9zX4m8urHP/2sBtFsP3xUX9vtg +VNsRMDlWt4ucFOa2mGOnqhSyvavXNiFj0qmMwJr4StqalkP4mglP0PPaMEmyrnSU +yyvGOr/S1uEPGS2Rfgs0uXCPkBHyGKkyxXSJx9QS6Mce3MphL9Owg5HE6NlrVfBn +TKYdj91P/TaF0A6J4wko9JUFI4FHXKwT2iCFWyJ+v+b38PC0GvD6Bg4SMQGEJKj0 +fix8QqiDM3aimfjlw/xz4YooKcS4Re5lUS776MqzmVSW3LtapoAnEMNCBH1s71p3 +vvXGAvZYLfPv5OnegdboWLCMMIYKI9HX3xY3NLBes33Hhivh9StiQhNGmM09R5b0 +qGXQO5RLOcWxiZ5AR6pTr0Ot8p1wswIDAQABAoICAQCO/c4Wccb7TFlAhD4wpumd +zX5GDq0WXV+94CldWGdARnOFvwzhkhRJ1zDtWH3KPfQ/HJBPfqIY8OQfnPUYMhej +3MnHxGJQKJyuqkHxumYJMFZX7cg3K9XHqne8NzjcddOKNa9Cx3DOkG9RjVpSRQSs +IS+d5XMGUOa6WWyVKvn3uJvD/B1n12DJDHiy2jtHRVCxOPMAg1z1KMWdwMaFrEZs +ZrHV/ow1jSN4btGd2SgkqJLA08IwYUKvoX8qQj9wzu0G/+hr5wzrsfZQEQMKQ+IL +s1b6jAzAV6IrVBbjEZXSviiXyZ0gteuCJW/acpMg+/3JPNQbWrCAFt1wluwowto/ +JAFIvlh29hfE5c+HEMpQNa0tdj7jepBn/0YEbgwpayMikKiLZXEpgheWCGypAQWp +Hm+N0Ym7HSGe82obxi8EjKRnNwFUtotWzUBKeo9aFwPZHLFlspljd+5ynDvKqXnk +txYZj6K3TtMs30HAG6fqxSPyiZ5W+5yF7nt6qLODs6m4Os+lrk1GnoqC0/uLMzIU +CRJKulrJOK4/Z2tPn9IAhcREbS4oROUeNqqo0Cfs3ssvkV7JTHF4IsKhCmElMmGa +bevOI+pvdjfECShy0Jnbtni6ece/II4/edfUp9kWN45xZLpzDjfqCVD66JS9g6ZU +i/EVll+d5zaI2TzzwZgHUQKCAQEA3d8siwXbq7x0cAB013+tvkvGMJ2EuS1TWdLk +a2P6CAnlZMWvv2cPSd2WpimHjqKxrbn6VE79mOc2l9Y1NOUUWWZATrhN7V8xMapQ +0YiYCHeaMERUAUKdzCgRN2/mRbZCBzpPBbWbb6NtKfRFJsD9zAe2JBwDVh9hvAL8 +YVBoczrEfj1ILnmtPhAJVI6s6rDsA4MgKjLs0Tt7Cc7rQxqNSpHEvwv1yLQmjp0N +L5b1TEt7fqVJ9dirykJquBYEKf55Z1qZhQzmnbu9OPnzeqGDakl5F/UsXDB5Bokp +ilcV+nFbh175Q+gTEhaSacGW8gzRw6j18PuciBjeWVEM5hhxOwKCAQEA2RnRMjv9 +46jQarJTFbIHg1SqrR87GSLnt6672M5TX9frzxMCuVDjKgdecstvLjm6X+/cPQKT +Q3javJnJXT4cx//1J7RLO6ZBVSCZf3//XntdHdFVJf5ySQtK+MJyfxjpzP6KBPfb +WPrva8p29ejbBdtsOT0M6gY5tPfadU2XEaf+BoyX9NUmu1U46Iqi+eCOjR+GVvhP +pJzGgLeOsaRVCfc9I7XPoVu3AEx5Kt55yRYm4fyGPsAd+mRDbIXMXdL0k8CfWWDr +8TT5rqKI+gFPFQCwToBW3DwHIGY+3RmoXFfQ0IJaKwOk4AB7m6HC3mv1crtjTFSM +9p74oQzNX7UG6QKCAQBEs2cygRTdH5SaXbnQRKvC4emzggLn5/4IMUIjcqioNpA+ +XOwngzz7rU6JkxBzfTMxTQYTdwYVg3qnF2AQSeK8L+o3teADYVd1PnyZ9QbGkGpB +CddNMJh17+4s0UxnR6E4Zbi0VuCTd/JEbGvBLT8pHzYqBjaOQ1dbBT2q0GAXVhoj +0Mv6ABlBv2t0MF2gqjnaeI7MIkqsGxPlHJpChAU+EtbuJUDs7cOGo2DC3KaGAlVy +CLJXGslO7rPm3oJZkn97HlWtGiqKquhTrSnUThDIJ4oEfhlHTocbG/ut53tZuiIS +T7k1arYFAtJBRv17Y7bMNBQ7k12L0s9+rpck5GqjAoIBAQCVBPSkj6tZbpII+viu +5rHjguVYyhwtx9jYK1eDnTR7kGGrlPgErjIPslkxYNSjHTsCCUnakv70jGtQlBs1 +JqJo4hesNkSB4D/uJ99VNk3a08D566uP1dUqsFa44/flp/ssG/gvKtbkf/KBwcrg +RwK4RYJG09IefUF1J8BLToQIuZBTfIP9qaXZZskWTbtK28ndsqrq3a0FaBuVVOnc +o9k/avcLoQuxTZwS12tAcs+TqOHtswGO5x5stg/V2Q2LxXbeSJTYq/+oZN2R8r0l +JmrbFsruR4fXylh189jouWjoYdrSlPdBmVG99HbkQCfbtq0XIOsrBMpxqnMtUPVT +4ZWpAoIBAQCrao4XHpRM3KsfPqdkg0vqFDBA+LzKlWu1fl8w5TGpFK8H1tv5kHNv +h0XmeU5cXwiweri3KjQz7h+qVBHZeAvyrEoxJQdImax+5OUy7lysDs+SL02gLZb3 +Z7g+u4Buwx+cv4O7hwUEDDk/5X3NBFk7/iwztKUtM+Fl8jt9K3K24s87xXp9YevI +UEawden9jVcuXQjEUrwz8cjoz/y25vK5pQ6k82PVImkMZK99/PmgmGyOl7hdRA3F +ff0Kb8pRGmV/cWRKzHaC8QchW8jdU2EGxMkrFl1DvoVKLbyDf1glRncKP9iozHAR ++s184IJCUvyMxH83uKKAiBGaDRC+Lbm7 +-----END PRIVATE KEY----- diff --git a/tests/integration/test_tlsv1_3/certs/client1-cert.pem b/tests/integration/test_tlsv1_3/certs/client1-cert.pem new file mode 100644 index 00000000000..bd6eea62094 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client1-cert.pem @@ -0,0 +1,30 @@ +-----BEGIN CERTIFICATE----- +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRfMA0GCSqGSIb3DQEBCwUAMFIx +CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTIyMDIxODA5NDMw +OVoXDTMyMDIxNjA5NDMwOVowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE +AwwHY2xpZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAMBU0fao +RrITeF4kpN81p7qirX/Gc56+Cux6u7RF1O6WU9v+V5jLw8chQZ87z4QSrFiT1ZnT +pwWYPwJ+pDk6AWEoiKuOaceOh0bjZCuxADHs+qQrye5D8GXvyFvWE2cT1pD5JNEZ +DSl2YHqNs4uTGRP9BP817iRDcuvdxpanaWxfXGfehJRMiEVgKDs+RUpoW4aVNivI +InrUWc4RXXkzaJKqhpCU3jAJBV4jSD5ZnA8PUfcoAj6z6T3I6phuDfRP5ldA3br8 +yg0hCB7Y5QrO5lRAgEoIuNnC+U6/AIwWPI36Rjiwg3EUwI/BIiL4AWjzkjSdr0mn +zyHPRk4pcn01T0GTpQi6tfZZpumDD3LkPuEy9svMpJ8ntqDnAsIJVjbg1S60hHes +yYHoQw1HxU0vrncxwcQkVaPLx0uGlioaLlvu83AVnWXbylZXsV/pLy6dE3H51GBF +DX3Zj6nkuJitk8/hNp440/Lve7SaKFPo5NdH+8ACWGdFdz3zxgPuhBDoxEeqj4c1 +FQA1ABXx2akW3lQ5VxTAg5AYORvVhJTozosr+Kn3MlRdZjl94tnVByD8MGLLE0C4 +L/qXR/IlbkOCz5LHapdC5j62ZEBwiElmMO/tMGl4ORV9tdTBrRZ9DMmKek2E8Qwz +y770PGkhp1cTzZt6UfZEympowmfjtiZfHIq1AgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBAHwRpqnpcD3EW588GSDZhZrVf3nS9M06ljQGtDUqNSI4XJp1cVT1sMaa4LjM +cWgHtayFw+jbDLwioXHjMlV+8tERH+0x+qsADG349caDYT/OF13v/jyuboUZ9AqE +KpfOQH7jCLU7rEbEl6kvT3F3xaHJg8mE7msyRFfguB2JrqZkKIj4HANxJUJo4PwB +5bq9nE3AVNAgUeQEwfu0r5SjroNpcHfm7xWqMK2mDMCsy/DvI7n97Q7vZajcTT0x +UXfgx+3CLEvLMpa2myE5OIMOeLzfZwxrxyNH7BdZsROnkGv1cX+9HZpYcue/UDxp +P2OApbTuZKaTJOyMADc17s0seE0DTAHnHAWrJwVhf8wYKKtEs+i+Sw5LNSkh5fgS +hTzGF93yClDYzWEqMSKhKPeimtpz4ZBNuGf471KbpVbUKJJvJmOxqoZ5S0kpFILL +YMALf652uf5or5d0cDNvcJTwvMi6evchIV17d/jH+MxyJQs9VCkMpJxFbMrXb3YB +b57K3Z25P6w3Qfj4zuKQFANari7Gs6qSiaUBiEhEdTQlGspkq+FLndtX818sbMk5 +LAK6JaUH0ywV2jn5XSW0irQLDXqb6Q0bSyw6pdpDjk0o4UW67JCE4kGagRDnfSqL +ZODvO/dEtVLyAsjmOx8MkqLyseI7VESVd8eiJAyL0sifh+/E +-----END CERTIFICATE----- diff --git a/tests/integration/test_tlsv1_3/certs/client1-key.pem b/tests/integration/test_tlsv1_3/certs/client1-key.pem new file mode 100644 index 00000000000..8bc1e656566 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client1-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDAVNH2qEayE3he +JKTfNae6oq1/xnOevgrseru0RdTullPb/leYy8PHIUGfO8+EEqxYk9WZ06cFmD8C +fqQ5OgFhKIirjmnHjodG42QrsQAx7PqkK8nuQ/Bl78hb1hNnE9aQ+STRGQ0pdmB6 +jbOLkxkT/QT/Ne4kQ3Lr3caWp2lsX1xn3oSUTIhFYCg7PkVKaFuGlTYryCJ61FnO +EV15M2iSqoaQlN4wCQVeI0g+WZwPD1H3KAI+s+k9yOqYbg30T+ZXQN26/MoNIQge +2OUKzuZUQIBKCLjZwvlOvwCMFjyN+kY4sINxFMCPwSIi+AFo85I0na9Jp88hz0ZO +KXJ9NU9Bk6UIurX2Wabpgw9y5D7hMvbLzKSfJ7ag5wLCCVY24NUutIR3rMmB6EMN +R8VNL653McHEJFWjy8dLhpYqGi5b7vNwFZ1l28pWV7Ff6S8unRNx+dRgRQ192Y+p +5LiYrZPP4TaeONPy73u0mihT6OTXR/vAAlhnRXc988YD7oQQ6MRHqo+HNRUANQAV +8dmpFt5UOVcUwIOQGDkb1YSU6M6LK/ip9zJUXWY5feLZ1Qcg/DBiyxNAuC/6l0fy +JW5Dgs+Sx2qXQuY+tmRAcIhJZjDv7TBpeDkVfbXUwa0WfQzJinpNhPEMM8u+9Dxp +IadXE82belH2RMpqaMJn47YmXxyKtQIDAQABAoICAAEBsKOg19XgwjWD7ZT5e+o/ +JbdQe5RuHDKGperYnres871oBF9ZWan2I5jIwFpJmrtP8sM+V1ZxKItDzGo8QnuW +sbhsI2OW/GBDmmecIosgWWN4kzL7CgwOiDbq1OkqMmpJ04aAohAAfZrGmRT27R+s +qFUJnDh2XeicHYj2UVfu29XzVTBNgj0StsMwnT45c5ktuL3b60pHSD0K3DlhKn/y +AohJLyyDL5MBjkQ9RdLSWrR3ciOP332iSpAHq20G6ga04TQ0VH5jGN7IddJrqMry +F3nLt+Pz4EgoOcGB8Ekx8SIk0ltKJ4PZF+uk7qT0+WPrG1rAVRYxNoX8M4wyNjr4 +TcAZsV2DnGdnp+2u0SSzMczeop5hPTJKxaLaPw1JOoIk5fqW94MbEHqGnEXEIN+D +OWeUKWZ/B1YubavOeR+c3STZrh2SgmhKk6g5NMFlfnyvolPu47H8NOrewOhVG+TZ +gsQoGxSyOXwZTQ/Jd6Yg9lek8nKJBc4Res7ia/x3H+gjjRoNFI+L2HQnWztx5YMZ +H9M6hcpclZubO/w4iLq9OB2QUHn7aIT3lWRV/xS0Yh2zGCufasaMA1KSKC5zq0Fk +gCzAkYDq/ymrJs3LQQ0wegKd1akL4z5fxmXTn2v2BGoEd52uuxhL0mM/9zzRxdR2 +IsOgAym+siLXMCHTDbdVAoIBAQDuMcea66WKidS+A9frCEsabYccKzrdMEhs6Mle +orFieMC+3ZpzFIBkXPZ522I+M4nIdBKuRw9PnYTE5t30euOj60Oq905j2a+Ho4ki +kW6dC+tNDF49Hqxn9e99xbvTUi97dREcERlHA+AnRektEciyD17bi88aUy9w83Mw +G5Z+ej+9o40w8+TDopE2SIJhUAHR6LOAMq1v5y1lmTn0sbTuxZFLA0qWX9aGLi+T +4RD0MzJAtKJDbr3yPTLHAXmaMSKHhWYYgWTH9iwEhGQAm5VJy3oNJUkM7ej7Yfs7 +aTDOk61egCKhEHdWavP68MqmNOPHgnq4/edmvQnhfKtI8SMnAoIBAQDOtWDi/OnU +ZjZPnmJwwoPuXe6IjYg47bFRGv94xEpSesCAYdXNaNLPl0f/Ut9y3nXr+j+XqJWo +UqtRGFu2i9lUK3cu90GLXEaLbYWGcgL8YnJu0senLxkqxPWcGxoKmbo3xMjqk/pF +EVZ5e1qqVTlrB4q7QWmLKrS8YlcaTnChPeSBRFfryg/xvQ11Hxtq89SKkTH4ps16 +0KtiCxvfQHVASyRLIKLdyabPInB+yP3Fsn4BIx8jGtOQ/OCY01TXq9OyaRu2hJTk +qsjOLnqf6huM2so3X0Tw8AdgNoF96JJvfhwiPI5CSo9UKjhuvus1Ip5ZFFNo4Ngy +n3Zlgp1HxZzDAoIBAQC9ffqmo3sxqI8Hj3UxdIqS/rlyzm1o0+V6RwMT92gYx6nG +7fLWRGQT8+TdcotIoqWlQ7oszTlABDdAkc3XlgANQre1hkLlqqM6y/3n8zzFUVsj +E4jRJNrRZdTeAPV4mzRNCgfPhUbPuSSU+cgT48b+6L10+VeMQMtIF1T226uw+L5G +tps3a3/9pxHQ1oRquESKYo6SmT5i/M2fuvNhWBJxtdjtjTPER4AZhRqykWV0cFo1 +Ib7I2Ivh74+6w9Ciux4WJCjhq+aqMYw5F72awitU5rw1QwlHcOldO0irrfZ3EQLm +YBesfLYDmNh6NR9ydDcVXBcXnl593DvFF/IH+FYXAoIBAQCQZydLCzHy3oC8eEH+ +0fRGljooDO+IDYzcwwaLgF0HZ5eJWE97EuqKeP2kAWn2HjC07Hp2YSBDmZTyrxiK +2wG1CjRVjAeu6oShrJ4mAQnS9JdKkldFlOJ4/WUza79yflgX05IkRcIFdAo8DY+W +BLl66qbhD95CiU//dpew2fFWwx0ZrPvazar7zn1TP6rwuWvWbX5CXYyYaqP/dxE+ +khIXGyc8kI0WcWPlugJqn9CgxoO+GaIL7Ra1Z+MjACd6DyBxt3nTtKUrZZ+oYdHq +Wypp6QJxUk2gH56XeRxXMBz0ZF4VEMa0ys98FY6c1yULVqbWRhvK3aBLJRkZ6vgj +BorvAoIBAASy89mnP7d9jY7pSg/8znsUF8fQwKpRJZKS+8xgbzsZP+zT7CjxCbPL +xcNK0fl6pRBv+gyIM013R7J1uvZJ3W6rspVxlXOvofvwYSuLOjwsZA26RM8s7Do5 +e62Bg7PUHbbaD+C8HzbJlyXeQ++oddWPbIkxJMwhP1Uvy3wA6c7E7w/UACZvv20J +KriU33QmW/o0YpOX8xBVwgsCld+IfUIYm1S1mpU6k3oUfGIA5iyKx1XLTMhlaYUG +dTdExwxQp73Jk585qWSpaiQ05OrgYyzZ8OHA2kRTPK+54HSwRfn6senf3TakZHBi +zjy/DZmOU/a/EiR7MCGg+jS1x9GBxOE= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_tlsv1_3/certs/client1-req.pem b/tests/integration/test_tlsv1_3/certs/client1-req.pem new file mode 100644 index 00000000000..b821609068b --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client1-req.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx +ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp +ZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAMBU0faoRrITeF4k +pN81p7qirX/Gc56+Cux6u7RF1O6WU9v+V5jLw8chQZ87z4QSrFiT1ZnTpwWYPwJ+ +pDk6AWEoiKuOaceOh0bjZCuxADHs+qQrye5D8GXvyFvWE2cT1pD5JNEZDSl2YHqN +s4uTGRP9BP817iRDcuvdxpanaWxfXGfehJRMiEVgKDs+RUpoW4aVNivIInrUWc4R +XXkzaJKqhpCU3jAJBV4jSD5ZnA8PUfcoAj6z6T3I6phuDfRP5ldA3br8yg0hCB7Y +5QrO5lRAgEoIuNnC+U6/AIwWPI36Rjiwg3EUwI/BIiL4AWjzkjSdr0mnzyHPRk4p +cn01T0GTpQi6tfZZpumDD3LkPuEy9svMpJ8ntqDnAsIJVjbg1S60hHesyYHoQw1H +xU0vrncxwcQkVaPLx0uGlioaLlvu83AVnWXbylZXsV/pLy6dE3H51GBFDX3Zj6nk +uJitk8/hNp440/Lve7SaKFPo5NdH+8ACWGdFdz3zxgPuhBDoxEeqj4c1FQA1ABXx +2akW3lQ5VxTAg5AYORvVhJTozosr+Kn3MlRdZjl94tnVByD8MGLLE0C4L/qXR/Il +bkOCz5LHapdC5j62ZEBwiElmMO/tMGl4ORV9tdTBrRZ9DMmKek2E8Qwzy770PGkh +p1cTzZt6UfZEympowmfjtiZfHIq1AgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +fGx/D6rNeaVO/vSUGX5q1iJKd8Gnw+/8NRgbuvCDuDOSy8LyqnLmVntj8q9FHpJM +SRH3LnylMVFZdybso2ZbhR1UDReGvHCtKICG3LLP1uWwy5nS3mkGBHFm9COyFP21 +kWOit1+106gEhg2f/NXh31HFmh+myepLjPEj5KxvnQhQfaQESsDYDZAs6/qT1mqp +A7GixOXh7hIFBJ97cU7fKby0Wtv7GqKAYQkaf26ImoGijtMPIlzvwJboJWmOYzIH +zrOHqspFkJD8YvYOwLIKdahViqXU7POL9uRn0vFyaXVcyXRq83Pz+bPSW9AFYsYG +ukSZiJs1yCINZI/Mk1vlfaZWYPIbBkJZ0Ny0vw112dIEilWAkVdsmJyV95aBddQI +Md64CYWZbV5P7/0QOX+v2ZQpWVnaV0m07K6VVuTL3bw6BQ9fcj7vaql6wl8jl/9l +nEotaZiY1f1pUUko3XzXpZEFB1lGBHupuS/Plz8pfFefN/7sOZoWn1VhD9I1A8uh +b2mg6hyQ7pe2NrHOTY1+L1xxxKKHt01kvDhws09qxRXtNsLrL8tl94i1ndLjHIwD +/VRnVU04E/VoTKaEXuETLZwOZu8pLwdiejrWEAmtsbmmcKq/Bk42wa+Wrmge2Chs +V8EOAtq91AjUcQeh7s2fV6yWweMGm1J6pdkNWckCsUs= +-----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_tlsv1_3/certs/client2-cert.pem b/tests/integration/test_tlsv1_3/certs/client2-cert.pem new file mode 100644 index 00000000000..886cc533fcc --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client2-cert.pem @@ -0,0 +1,30 @@ +-----BEGIN CERTIFICATE----- +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRgMA0GCSqGSIb3DQEBCwUAMFIx +CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTIyMDIxODA5NDMw +OVoXDTMyMDIxNjA5NDMwOVowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE +AwwHY2xpZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAOGIanwq +rZCqMT+ePwRkiQnD0gyVt5+kwkb8X+fdBJRF0kr70YfzMpKdZP4l4W6C0Jv/ysIH +usrI5pQxcFAIe/7DLW0JPkMLKgXsOtPNZPIkc7WYkq3cbzB0ZTsK8O3IYhwn0dAY +O49T//YqM3TLTFsG89B6uCEg7dQiP9hh6boic8M/WyAseOkJNfw+wYcTWhl1toKc +dLbo8ehESUtVhCOPVT602zBUYFkleqKPeHJ/gzl3/mTnqfeUBljGI2aXwOl7r6rI +D/or7wew2HZ81dTGDqB+yqUhBIVNseJPHOuKbke2E2qWVzAkRnX4b2ehsSaSknpC +KGWyLibaQyR0/Gt8Duu1XIsZKeFjCw27yogSTQ6xTUhLDF1anQyoJX9btSQZsTbD +3vtHbD1O07KSfiG0Z1p8LaR10RAFA7f3HLwwy6c9ExpGu5ED+co8aO5Xp5wysg8X +fYZYx4CaY3moQPJPDS6eOpUXd/6h27Fm34h9VdSj2p6j9JYsmTeEgb0x+JjAQyRS ++Koj/tbSbBqjbvO+FUaldRlHCHYCQTnjsSNBf7SxqE9lfgFitcgiHKSdD7QIfwNB +EK1o7L8OugC/SQtHGe3ngUGuNmHI9w6ItGuVqoJYP3Hwa6ClGmYlTRLoAj8NkBib +toxwGIspTlTzmmLXpqeZTPaA2K5eiq8O5DKvAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBALp4L1aky2jfgk18tney56sUL2Us2aHqyOz9LlowWFdNMtCKo0WKpZ1qXGfQ +92QE+zc/MEdmv3V/H1MmSr7trTq1u7E5vVVI9Lq2lNbRLDQLi1+qd9E7Kdl6Oxw/ +Ecc8oxIbg86p83HhzPfJG64m3x6S6m2c4sNrHRAO/gxxJex6ZSFfQwYJZFlcvvBX +CH70RBtBG/ggasVtwqBuuIRNJ2gAtiWG2RtyGlOjPiAg7nUQiYlXLHVOjvrKDvrI +KTjzRdEUMqKtIrNUBHSbWZlxKZ2Ddavshg/0T0reAN/u5KTDxiGaQxlVEA7xfm+j +etqjzTz7LnKuRsA+Z8UUYaV6mKYfKObDoUs/12IomRCUTQi1K8MP3fGmmk+4Xiyu ++t15EqWJzhjuT2RjCAL47X6ksdOtonX9t29l6ykCvYpK1mlzG+EhqDyMIn62TNfx +OFjWwhIFgyEUWtwkihIKtv3ZVtrJVO/j+HCUfq+6IpjYHdlpdb4OaHgBtpokOtM8 +PmTHJbP2bxmNIMAU1WTfV+e/JkdTKHJclC5DTGF48yRgdKSOTq0G1eJYh4DhlEIM +vOw2rXeWR6VSkvA5vF7HANEptl1tkT3dsKR4BXkSIO16ldWBEHMM4UeXx85GGM0k +TRON4FWBMi6PXX6mrmPXcUW7AyKG2JL9gNlxRgWHVK7xmZyp +-----END CERTIFICATE----- diff --git a/tests/integration/test_tlsv1_3/certs/client2-key.pem b/tests/integration/test_tlsv1_3/certs/client2-key.pem new file mode 100644 index 00000000000..462916c0670 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client2-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDhiGp8Kq2QqjE/ +nj8EZIkJw9IMlbefpMJG/F/n3QSURdJK+9GH8zKSnWT+JeFugtCb/8rCB7rKyOaU +MXBQCHv+wy1tCT5DCyoF7DrTzWTyJHO1mJKt3G8wdGU7CvDtyGIcJ9HQGDuPU//2 +KjN0y0xbBvPQerghIO3UIj/YYem6InPDP1sgLHjpCTX8PsGHE1oZdbaCnHS26PHo +RElLVYQjj1U+tNswVGBZJXqij3hyf4M5d/5k56n3lAZYxiNml8Dpe6+qyA/6K+8H +sNh2fNXUxg6gfsqlIQSFTbHiTxzrim5HthNqllcwJEZ1+G9nobEmkpJ6Qihlsi4m +2kMkdPxrfA7rtVyLGSnhYwsNu8qIEk0OsU1ISwxdWp0MqCV/W7UkGbE2w977R2w9 +TtOykn4htGdafC2kddEQBQO39xy8MMunPRMaRruRA/nKPGjuV6ecMrIPF32GWMeA +mmN5qEDyTw0unjqVF3f+oduxZt+IfVXUo9qeo/SWLJk3hIG9MfiYwEMkUviqI/7W +0mwao27zvhVGpXUZRwh2AkE547EjQX+0sahPZX4BYrXIIhyknQ+0CH8DQRCtaOy/ +DroAv0kLRxnt54FBrjZhyPcOiLRrlaqCWD9x8GugpRpmJU0S6AI/DZAYm7aMcBiL +KU5U85pi16anmUz2gNiuXoqvDuQyrwIDAQABAoICAHZuu3RuuOxB41DEGdWFsczV +7wS6zk1gKME8IGTS1GfEbpT/vd1FYaZKTtGDNOlieoehAGl5w6Zfb24ctBzjB7IV +7lHWy8JLJ4sqrQ2ySzM43yZac5QnMKBiTxJ9QV2sn5CnfG9pekVe2Af9yz2m0Hbw +pLIy72Q+NYXzYlGPwTwEgYPjTkgL8oZ1VssabWgwSl0aSng2DrhKhVXyHgcYZiaC +S0J9mKi9dkb5/ndFHfwKZ++Syp1UZhXjvp15lvd181DoqavmGTXHQmNog5NdJLDy +PJYdXu7t8sDJtwLfhpFOBXFU9MdBIZHfSr0CdAYYi710tMTM3wfgVIoEjcOkRzRx +36O66ehHfcyNsK52Z+DZ6uR4c+MOG0kzTiHQhyxjiu+3nYMGw1XdyE+k+eZDMPd3 +vTaR7kYOQvVvdOVAUuFZG9mK2p0mpofb9cFxFD0vJUqTYXxSdKUNIexR4mWQJw/h +rWOg/42GK4iLY2X6/CsDh6pTsM+HCzwmTGGkL54FvDsB2AhAhXPz/kGiBRTrh9/p +QBxacSPoqN+kF3u2qZRPEmjuimiW2AaXARbTABNSBQJIEmWzWOVdgUBVetGoN/ML +8mcYDmXhAc6F96eqPj0dX8cHfqYPguPhtzLj5V6XGym7hYQyOLBcE7tr2BcdjUfM +V6OFHsPNmsYWZ9F6zCv5AoIBAQD3M6gziCA0G0cG05ef0C3D9OVGWpHqr0yiR3MO +ZKsYbJJn4WOtWWvo8N5oqZBQ8VIoyGd1eiSIDuxXEWniFWjn57QN2nrDNTsEQPgk +HzomgFzuDZ7V4JsjJ9F2nAG5i2HoEwKNHdzfni6mhwGaapd+4GlET0jlC71p+h0X +CPsD6Jwabp6OUyT+xm8XW3mTWskBzKfq0OPbsdv8UB1dPt6jVrkjoe76TlTsWXWi +U9p9/h6kI984R9T10J61c21dokuL/KlHqb6TIQY3RcCgm2bfucmuawIq6vs1PBrK +VCvMX1BuTva9CYg/+hxm9Ky08jFWSCEEtzaORyN+4mmf4maFAoIBAQDpj1NoI7RP +mYqG9vHyXSDUUNbchpLOFKIaeh2DGk0sFmLi/obglsxOKu8K3r/EobNt+vpDTBxI +1EjPWdKuaXNYYjNjrVmPHdHPoHD8JmXzJDbZnXSylV9MVYSMNF+7BWUiPg3/QC7b +1a+ljJH/KEWFb0xrIfNPxVzyq8dyFOxcmLfRVLYlEW+fRYeaZ3QApxGi/BoYK8KN +vG8f/a8jpPwYCVa3JJ7/donEtsbxTkm66aacn8Vo2Y/tdo0nxyqC9PyBU+tV0u4w +aYtEZ28kpC9QheRx8D7WzhvsFc/KsshiB6jddjOVR6VgiUFCo+b/5PqpyZVTVrcs +tj8062A3KvyjAoIBAGRPn/eZS4gZcY8BmcuODKQx4j/UTNXw4KYRXE0A6LT2icqB +mZMkcDeMVpQeCqPt6SsHd4QiVmSnuZvzQwYtLe69BUGB4MMJ/LLTMl5mFZC+Efe/ +qy6bABkZ9VOuJr0GJGqqHCTrc0+CvudwbWQd0O/5XH4NtkTLqMcyaU+Jo2KIp5/K +N6kFcEO6fiX6RrFW665BP/p3XZ8u41fVorTN6EZb0LD26yTDWI64FpYSdN0fm4t7 +yv7ply9QwrZa6oxOaV2a345nASBvDDito2cI6IvstjyCy9RimiGWDEECOuup2deJ +T3KSRanAcnoM23Bpvz+F8XAacJb3ox2//qCUnIkCggEBAJHl2XllTF6pEFLs8giv +SjG26fFKE2yukPCvNb5O8MRIm68mxkSHjsqJoVeN/Act57MdI7ZkVgrcqTr15ljT +QJ2GgomSoS54tzbXB51Ls0XmamkYJezkyGobxbf7g42Fej6guwenJV5oJtfobs8Q +bhVDiF4oECDVrhFdYzKNhXT2ZWVbYIjZUnwQ5/t5Aorh0m+Ywgg1VcxKWLSIOR6w +ElZFhyjStIvqlXcPokjc2cvr5wtR9vRfa7wv4U9m59R0i0OSk6DCKc6OL9QkNNaT +xYasjR7rr6VpjSG2Il6BvhEWrdLh4qku30zlkKG7VzKk7Dyh0ykDM1u34NYC7tCn +hrcCggEBAO+Rnkk5eYYqGk/64+Qy5qA7djvvZ8AgihwJL3+ZUDSOxh0W+Er4NB6n +j0kI22N//D2j6hg93TNj9jI6lISfmY+TSikr/P+bQPGXl8wvekQxpjT5JhCYI93M +LXnSULuy7J1ujkMGdxEvfOTjvmD0ejtnuaGd+jM7hx4QNBbJj4VdV+r5BQOJAlfY +gk6n3RgAnu86szquWM6dObIz9BWtIcMVGlxA7yDmxjVDDHLwGpcwG+MTQRcHoeT6 +2+b7FtVN1NFLazfgPS3bxKs5jaUB+Ibm9BD8B7THviNikqRYqwoJMWpJgdWo/lOQ +X0ueOR40kfa077G7jNfb03qOPUR1mFw= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_tlsv1_3/certs/client2-req.pem b/tests/integration/test_tlsv1_3/certs/client2-req.pem new file mode 100644 index 00000000000..846f6db84dc --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client2-req.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx +ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp +ZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAOGIanwqrZCqMT+e +PwRkiQnD0gyVt5+kwkb8X+fdBJRF0kr70YfzMpKdZP4l4W6C0Jv/ysIHusrI5pQx +cFAIe/7DLW0JPkMLKgXsOtPNZPIkc7WYkq3cbzB0ZTsK8O3IYhwn0dAYO49T//Yq +M3TLTFsG89B6uCEg7dQiP9hh6boic8M/WyAseOkJNfw+wYcTWhl1toKcdLbo8ehE +SUtVhCOPVT602zBUYFkleqKPeHJ/gzl3/mTnqfeUBljGI2aXwOl7r6rID/or7wew +2HZ81dTGDqB+yqUhBIVNseJPHOuKbke2E2qWVzAkRnX4b2ehsSaSknpCKGWyLiba +QyR0/Gt8Duu1XIsZKeFjCw27yogSTQ6xTUhLDF1anQyoJX9btSQZsTbD3vtHbD1O +07KSfiG0Z1p8LaR10RAFA7f3HLwwy6c9ExpGu5ED+co8aO5Xp5wysg8XfYZYx4Ca +Y3moQPJPDS6eOpUXd/6h27Fm34h9VdSj2p6j9JYsmTeEgb0x+JjAQyRS+Koj/tbS +bBqjbvO+FUaldRlHCHYCQTnjsSNBf7SxqE9lfgFitcgiHKSdD7QIfwNBEK1o7L8O +ugC/SQtHGe3ngUGuNmHI9w6ItGuVqoJYP3Hwa6ClGmYlTRLoAj8NkBibtoxwGIsp +TlTzmmLXpqeZTPaA2K5eiq8O5DKvAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +3DJlf7AkZklzzswgm487f+y2bB7IYr55JwENASDxQEOdVcdgLC3IWu3hLiFwdqac +0Sw2OHZuETwJiIX3fD+qUT6TgbsP21W7wEQ4jfKg/bsXFMbrvw/ILkOW2JLTH4Cc +9ylCN+46dQ9heATkiF/Co+uASz9IoSDdtoycA3BuKGBZI8VGa56QmJOOsMM5NgxT +RTh2r23tV4E8AGYj3HC+b1rzK1RTlsj/m5nM9Jv0/NqoV1cprS1ONr8CBhN0ttuA +WLrG+DUZTMJYFabqTptlgejQFhiFp5HT5A+eXgZ8uEUX1I3q5jq1BEWtLdmJNZ45 +QViSJOokH/+1kfRSWiAH7pdBz4URLBcsDhAag4J7kV38t7fgdaIizY8R2Ss82iEP +xqa4A0PA065wB44zng/VrPrHoH1YnGRugXEnrqgcipC0FxUl3oQjvwOSR/E7yFU0 +GIr1MpRcyrd0z4p16783qnMpE1Aa0msED2SBKIK13WcNY+CtDF/wO47ZNywl1hBo +VkM+ohPpmonaVXNGdpdoZpeGjkBUbqkn+so4aYkX/WuZ6vY2vwdV0prD1vdAFfD2 +AeJx5ypu5aeKn6nK0eMy6W/VEJx6RLCiYVOCIcssgy31rmk4iLQJP2StYVK2mZKp +5aSR4eTv1/XlMujq+ZqcuUqA1id9wP7908Xr0DzdNdA= +-----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_tlsv1_3/certs/client3-cert.pem b/tests/integration/test_tlsv1_3/certs/client3-cert.pem new file mode 100644 index 00000000000..ce9a472cb9a --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client3-cert.pem @@ -0,0 +1,30 @@ +-----BEGIN CERTIFICATE----- +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRhMA0GCSqGSIb3DQEBCwUAMFIx +CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTIyMDIxODA5NDMw +OVoXDTMyMDIxNjA5NDMwOVowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE +AwwHY2xpZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAN8Bt8gv +50J66lQ+l/NUW+lqW4DesmSLv1BnjDd5SSA8tfczt999/l1epAGeEN/Pl4dAxXP/ +cxpx+J+xF6SKNxQ0RP+PHQMiDzCUgBq4OKs09kDQ/uvycUZlQuWPtR610TWjZR5r +VrNSwJQp3VGDdNyEbKj/yd6Yi5NC1iLuqPC20fw5/9BVTm1P2wWX7nv1AWs235s2 +yAG7pLNcgPiTfSmXyyT31YBjb9Onun7gv7exI/3K9mS+aWq6ci1xAXtykVCs551T +OQmDAUxda041YghEThO4MrZa6uSZqVwnoUcXTla+8biLYb3+9CnIjM5whAOTR+9r +jpsuuXEUOsrX9Mgb1HTS+ksmrA+Eka7MdVi60Hoon09uNvcTM8CSKNgnTzcPCM6t +J4NHDiimJM5WA/eY8i3NNCTa1HUGEeIK51UOdjIFKsvzG0TCI2FM7jQLJK5S38tI +deZ98iQbguVGhoCvRotLEAwW1M2rSOu7bxAZU4QJ93IuUfkLn2BipOuyuR55Z/6F +z5Jij/1lK2/pKWhntUHTIpG+bBHDF++0LN0aB29uIwYRkoz9JUgnNz4FDVbLvJ+z +5Ywr61t8AujZdfMZDpRYlzfWPGej8pm7/Eux5jgx/3jcLtqfqkfZLSuFjBKfkUU1 +eGsC80RupMJKIeppv541W6nQJlmJYKv7DCvrAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBAD+YMVntBdeq7xJEL7xU4QEHzUGhDWodGMJfmswcxe7gf5Nztcq5YIug+akL +ewg0wzgCA5YGz00J92sKDF16RmYyPfkxmrCYdNGwISjNJyEEcPEVkdAzwILjv2Lq +0shFlSsf+Zp/M4XhHeirmzz/jJ9KHlzEYoCz1WOn+UGF12KgV2oQOamJSWOMCoMh +81oy90V5IlCBqnYfZCYj7cbYLBd5jZMZ+7lsVnxttzPTg1gIoP6vrLT32Ubnzx9N +IoAeiUg7az/fbnuOkJtu0cjz9aSdpjm2h2giyVAFJ8DkQ9C92tdr9DWZKn7rDO16 +TMdv0q8NFjRGhqdmqWUG6o2cUmQsJ/ZiIcHx5X1b7j7PYSS+ae9zi1tcpHAN6kCw +WHguIf5I8MIZxE741ZMBokFSIqd6Bh1EP/TUx1+g2a/nH3ZaNd4/KKADxfUU2Y58 +UwdKeX9YpcRz+NNO+1h3NoE1a/i0dhwiBf4OzBiV0WpAjQHT95IlQxTxfHFp42IH +GrbqIS3qK5DKlNFkBBk1beKxBGKmTH+Pw6fhjkuPYQzjmGo4xluivfeT8SiBT2iO +uIGLd+sitIooom0KEjHuHS9cdZ5XEPIUDAFhmIt7Y5K8J2fs+xtYzhibg3n0Q6qh +xTx7GzhTA1HSUE/467af5J3CSfpGAjZQZo/t2/A6tCumzk9F +-----END CERTIFICATE----- diff --git a/tests/integration/test_tlsv1_3/certs/client3-key.pem b/tests/integration/test_tlsv1_3/certs/client3-key.pem new file mode 100644 index 00000000000..b7464eb2866 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client3-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQDfAbfIL+dCeupU +PpfzVFvpaluA3rJki79QZ4w3eUkgPLX3M7ffff5dXqQBnhDfz5eHQMVz/3Macfif +sRekijcUNET/jx0DIg8wlIAauDirNPZA0P7r8nFGZULlj7UetdE1o2Uea1azUsCU +Kd1Rg3TchGyo/8nemIuTQtYi7qjwttH8Of/QVU5tT9sFl+579QFrNt+bNsgBu6Sz +XID4k30pl8sk99WAY2/Tp7p+4L+3sSP9yvZkvmlqunItcQF7cpFQrOedUzkJgwFM +XWtONWIIRE4TuDK2WurkmalcJ6FHF05WvvG4i2G9/vQpyIzOcIQDk0fva46bLrlx +FDrK1/TIG9R00vpLJqwPhJGuzHVYutB6KJ9Pbjb3EzPAkijYJ083DwjOrSeDRw4o +piTOVgP3mPItzTQk2tR1BhHiCudVDnYyBSrL8xtEwiNhTO40CySuUt/LSHXmffIk +G4LlRoaAr0aLSxAMFtTNq0jru28QGVOECfdyLlH5C59gYqTrsrkeeWf+hc+SYo/9 +ZStv6SloZ7VB0yKRvmwRwxfvtCzdGgdvbiMGEZKM/SVIJzc+BQ1Wy7yfs+WMK+tb +fALo2XXzGQ6UWJc31jxno/KZu/xLseY4Mf943C7an6pH2S0rhYwSn5FFNXhrAvNE +bqTCSiHqab+eNVup0CZZiWCr+wwr6wIDAQABAoIB/0I0QFst3XnfA7H+4x1Z7e9d +o8yeUFeJJUK5eub9Grh3TY4VzICM5vbRId9ZDalj95gvom7NZ15yd1zxNhOi9LcK +zXERC4vikJ/bdix4hFpPXsvfP87MKtS7OyDriNmVIIbL+zkMpLCX4JQb2ZhZblgI ++DkztrpejxEoxmmYcI8Ft1Ep5sfyi1XoXx1J/YLPOZyarcdme/oHut2EmMUzA/VV +GvnemYOEAa7UHImOL1xZOlYd6wf9f04wC7Vx1v7PBFTu/9O04TnxqnEBStns/y11 +GbjA9k0ssI8tDxpMqZRxVtBp31jqCBpflhzRbPvca1SkZLavN6baODNZzhpqAkDX +3R4lU5C7wu4jtzydUyEsCFNdtkGKlxpZRbRZk+keUC+HeCmXPED7p9egwF6Zi8VI +oaXl1KvHZO2W5x/BV9I1taEPhmOuRR49KxkU4e+IjqaWYN1qsqYqCs/od22Rah72 +KT+thr0mdxC4lb+pvteafricUQuq/dSbEY/lva7PhPQRKVX/VxOaAxBnhA1LHVgZ +imsW8W3eOQYJbxniTrz9EblWAg4dCcupsjMDUDUyACB/E6isDtYU1J2im6p4gbqw +tXg3bRh7KruIHbPSJyrFm1uqe+v97TLhpwPHKCsxE4HiJgRzaQDRckLJQebqNp3Y +e7kLLjg6uGsjAl6OwKECggEBAP5bLGVrmBmAz8RYPnG1MQWlsFg/eIhMFCqMjT3P +swPUU2VJKC3TC3OwFLxlAr0lkXol+8L8aEvxGjHksleA+1z0lav43b1/2jKgLgI6 +Ym5BxMJa+sUJpI6K7CedJ6wf2ozbpVXazvNBZ3o2l0QbC/KpX886CZH9YJgn7N0M +TfPe9er5zmETdHGTWtA0sDI8fZ8XndKmnWG9KTQCGur6gemF8SKuzGv/BnL+BZnv +bDqSvyN8Wjk35KPNeKVW78ROxRuEdB5brryGk955hX50PRRoofW8GSmLJNKNYvIj +VRkKrDKpz8gW1C2/xa9j5tQkGRFMDAptmk+yvtmDxfZz38UCggEBAOByrXLMTcwR +bz4MYcSmEdLv2VA/bZ+y0kW0frUU5il2fyQseoFbunVbTDiXYf40uueMbOONZktM +w04CXKRaTbnS/s6SGU5VW19jv+xzwrzpB2Shm08APwgFnSw40bKCpN4ZWQbOyFVq +QIMXfA0+Go3zJz37MsSgY+mzhHp4WITobVFpdlhaLvrLPCB78uInZrFsvNN6NP+K +OIbOoTA9u+BP73THHkpQdrRJaJWowpqejz8kzQ/Xu0Xe6AG1EGVp39phKpWH9TPF +8xoxjbdIGPkzCzYO3hgz6PlnWVj8iyTxklnaUblqKkY2mOlMA00ujcdF3d3IHvaM +Xolej+XeZ+8CggEBAKeZDdzaE4Oic8RtXN/xwxZ0gYj0cYhlkNgkeqCi7dL1IepY +VQg0ypP1DwTADhjx2zTAOG7XgCWh/V+o0LaFv5sVclW5iuplhzHah9ZiAB+kaHCk +IB6a5vohoc/MZqqs5oXv6LZ0ke6JRxSpSezPYYUIg5/5Hvs6GF7J1/IjPG4XmLS2 +23zto8l+jdUpEnxXjXK5zf1SWdtgF/kz9ealH9rurd/ri7kRdn9oz+oJb6f8r8ND +GfQf1yDzr65KZXxVZt1l3llukemZR2/NZN/Y2bJL64QO6AmOrLmr/emMzHLOrH5J +lCbEnBR1C14xFpTsIDRchoaMh6RCJC0Q/e0Rlv0CggEAAOIysJsBS2ZeK75cvCtz +MoNjNZ+qTNClZ0TYotncNhmTUo8iRFQaHdAoMqjV5+xJOBQjcZni5zT8J9h2iOca +GzsraaDFnLtVSsDXxpSGFbxNHSZNuDfmB6AOCFiI6sz83Sr4YMB7pWpvqpRzFpJC +BIEKjIHqpz+CZS8hvGGw54UKuSFTJ/Hi8XXPXMlgIWfKTbSB4cs/XiorIsy5cbks +fiuSY8FM6zn53afUU5KAgZ9SLQt2CzPsNtAz1Z3i3KNYEEIFquUIIBYNaPL8/dW4 +03JR/vp8AVhi+Ghhv6nu2kxhKR1k6Pf0Bqa8X16/PJSMVlZ+Extwk8Pls2C97Ee9 +3QKCAQEAgjcbHKBjd7AeyNpPSzNpv81Rry5qqOc+Cxx8LtOHBl1wc5VB5FPxfbuX +MX2skvWPnokDoXcI1a1WQwdjaZUsSoqdeyPtw8pFWiNLJZkYImiP3zMCZXYUEkzk +3EXQZryWEqBYBqxlEvTyjbBmnrAwOPOUKARFi1l9JKJ4QpdELXo9Yl+w2IQEQ5N9 +jrSY7LwS/cb25rhEc6oh/89aY83HPyABh4lC9bsciXki54YIeS+y9ijN8yCRxikr +mVGfQ0Y/qcY9spAj05yr/vnlENBB5ohxwKKsemOnH93E2GFxc1dzmWCGvISjUduB +I68TOg71OfCKgfeixNgcOvQoN+xngA== +-----END PRIVATE KEY----- diff --git a/tests/integration/test_tlsv1_3/certs/client3-req.pem b/tests/integration/test_tlsv1_3/certs/client3-req.pem new file mode 100644 index 00000000000..7b4445b3609 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client3-req.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx +ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp +ZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAN8Bt8gv50J66lQ+ +l/NUW+lqW4DesmSLv1BnjDd5SSA8tfczt999/l1epAGeEN/Pl4dAxXP/cxpx+J+x +F6SKNxQ0RP+PHQMiDzCUgBq4OKs09kDQ/uvycUZlQuWPtR610TWjZR5rVrNSwJQp +3VGDdNyEbKj/yd6Yi5NC1iLuqPC20fw5/9BVTm1P2wWX7nv1AWs235s2yAG7pLNc +gPiTfSmXyyT31YBjb9Onun7gv7exI/3K9mS+aWq6ci1xAXtykVCs551TOQmDAUxd +a041YghEThO4MrZa6uSZqVwnoUcXTla+8biLYb3+9CnIjM5whAOTR+9rjpsuuXEU +OsrX9Mgb1HTS+ksmrA+Eka7MdVi60Hoon09uNvcTM8CSKNgnTzcPCM6tJ4NHDiim +JM5WA/eY8i3NNCTa1HUGEeIK51UOdjIFKsvzG0TCI2FM7jQLJK5S38tIdeZ98iQb +guVGhoCvRotLEAwW1M2rSOu7bxAZU4QJ93IuUfkLn2BipOuyuR55Z/6Fz5Jij/1l +K2/pKWhntUHTIpG+bBHDF++0LN0aB29uIwYRkoz9JUgnNz4FDVbLvJ+z5Ywr61t8 +AujZdfMZDpRYlzfWPGej8pm7/Eux5jgx/3jcLtqfqkfZLSuFjBKfkUU1eGsC80Ru +pMJKIeppv541W6nQJlmJYKv7DCvrAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +Rggrols8hXGEcWeIEGn66kY9IVTzaTUf3oMfEbdf/2Q1QzHzmqp53yamHl5ioMgX +o5UBVxthgh1VOxkvCxIzlKDJprzVFkfwwc7h9c0HGt3No/ERobHDT6YRaGukAL5g +muIGBUseyBAOIfyqc5kbCRWfPrAOttAH4gd8XMBgO8XdfHAvyXBC8Ha55O6oriX9 +IAKL5+3nVJkBle+62OmROnstbcdKyK4UtOeki/6ptYVE0d9I+NfKjuk3eKtICW8Q +Pn3IEcNEZoFG2UQ19ENWwYEZyMZJt0aunqnm7L4RYiZT5w4meeendzXSKLKR6+Ye +ULt1sDRskgKoNRzmeCVzci05BG48jv/E7Az6aV/qhGiU2qIAPMdVXncWUhR3fj+E +CL/uLifOvfC6SnKw/7qQmgjUvEe4Duvi670a5QuImpm/mAIN22cXPc+QquSdR5xy +loz/o3JJQZemPAOM0CMIHZ+cGESxH30QCBNn5HfcOf5fRZVCss4Hl6JxHR2G4yN3 +RKEIUXR03qgSK91WHl3WvqwXgmIAiUuvPjo2i7kSuaUUHilZiXK1ngIqYfUTB5SQ +O8pG0fx3fbhVDA3RQfXeJE6FA2AyLvqOcsseRzvcQjQm4MU7p+RVaY17rI6/EkS8 +ac3E7BPwnXqSAkPSEgoiezv/Z0Hkmrcu6fIsUuf4ETU= +-----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_tlsv1_3/certs/dhparam4096.pem b/tests/integration/test_tlsv1_3/certs/dhparam4096.pem new file mode 100644 index 00000000000..102b8dcc72c --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/dhparam4096.pem @@ -0,0 +1,13 @@ +-----BEGIN DH PARAMETERS----- +MIICCAKCAgEA/yTb5We6gyTktHTlb/PimPgJhvY3Spp9zzBO4I2r/f7p/llPLj7u ++VDW8s4Z9+UUVQKoVoU2NLcgbgIUWrkAKuBCqqxxh+/+0NdP/klkWUX084HBvT5e +Tofnv2JT4EB1ynlNCF1q7frF/ELNyPzOWzh2w14XwoWxb3ojrfwG7N9p7CQbSwjH +f1lDRbOcLX+n/pic4X42KqqXqsg6ehtwORz5kMlT3DTAGC7sfB6rL8Y8/GrPmTNV +wny+UdnTyku8+OJ/xhL1ERiOGMCcP5jhIU1Bq9Uf0ayp+3fJazPAyP5iUprwd3DF +9UvaEqIFeaknq5qX+aVf8G7GpCpIC14db6uEJCH/oMSGakJdC0jWZzN6EeJoUILY +I0K/+DA34/Yh7SAehqc2rAukiquCv59/Lm+FlZyIzjQoOtKI06oIjGr7kbS4lvgF +NbN7AXYaou5cJaffPmfgUuU1hw9gn2kYYMb7el63BBzIKX/ptWR/uJ59h05ivYGX +J5bok81H7gYvwHaXkKdQ2t3FoFJHAekKpraiqIW7qHE4O2lb3JOU9GvAQ1QLdNNw +CKJPFKBVes+YxmncJexxvyVXj1N9XXriOG949RwpLF8d85yx3eN+3cq5XJx65Rog +OknNaTV8uTrpX/WGcVylApshMy9+4LP352ZsmXDuP7yiBqlaxyb/KLMCAQI= +-----END DH PARAMETERS----- diff --git a/tests/integration/test_tlsv1_3/certs/generate_certs.sh b/tests/integration/test_tlsv1_3/certs/generate_certs.sh new file mode 100755 index 00000000000..d6126d361f5 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/generate_certs.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# 1. Generate CA's private key and self-signed certificate +openssl req -newkey rsa:4096 -x509 -days 3650 -nodes -batch -keyout ca-key.pem -out ca-cert.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=ca" + +# 2. Generate server's private key and certificate signing request (CSR) +openssl req -newkey rsa:4096 -nodes -batch -keyout server-key.pem -out server-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=server" + +# 3. Use CA's private key to sign server's CSR and get back the signed certificate +openssl x509 -req -days 3650 -in server-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -extfile server-ext.cnf -out server-cert.pem + +# 4. Generate client's private key and certificate signing request (CSR) +openssl req -newkey rsa:4096 -nodes -batch -keyout client1-key.pem -out client1-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client1" +openssl req -newkey rsa:4096 -nodes -batch -keyout client2-key.pem -out client2-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client2" +openssl req -newkey rsa:4096 -nodes -batch -keyout client3-key.pem -out client3-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client3" + +# 5. Use CA's private key to sign client's CSR and get back the signed certificate +openssl x509 -req -days 3650 -in client1-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client1-cert.pem +openssl x509 -req -days 3650 -in client2-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client2-cert.pem +openssl x509 -req -days 3650 -in client3-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client3-cert.pem + +# 6. Generate one more self-signed certificate and private key for using as wrong certificate (because it's not signed by CA) +openssl req -newkey rsa:4096 -x509 -days 3650 -nodes -batch -keyout wrong-key.pem -out wrong-cert.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client" diff --git a/tests/integration/test_tlsv1_3/certs/server-cert.pem b/tests/integration/test_tlsv1_3/certs/server-cert.pem new file mode 100644 index 00000000000..6f8e5a3c6b1 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/server-cert.pem @@ -0,0 +1,31 @@ +-----BEGIN CERTIFICATE----- +MIIFSTCCAzGgAwIBAgIUBfEMZ1Z/4weV13ryVA9qyNTPJF4wDQYJKoZIhvcNAQEL +BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjIwMjE4 +MDk0MzA2WhcNMzIwMjE2MDk0MzA2WjBWMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMQ8w +DQYDVQQDDAZzZXJ2ZXIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQC8 +jV8igQGgCvu/7BJDI5VQl43VGAFjH2Na/E9P4E5uwkSlJVED1WKvIlxRWhOaQOfC +587nZVhQtHpdbCvBdKrHml4SVbTchs5SN2kZsHeqaQzcGnejnczE0SYo4xNyniSv +GiQ1M8G3fiZNflEIPM/+Ob2oI3YnVWFGy0a5rQcHZWS45KuGILMP0aRHyzyh/31c +K3i2xA7A3V2jBNuD4kHG8TLgfDeoCecTI0iU/LJnDOolX5XdpyeoJ6YyYOGg3F9e +bRmbNlJN3Iky3Vzyc4jYG7y6f5DqfebYMW6hCvLpf9lN6/gPNOb2KjL3hvJ+hbj+ +b9EkVAzpw7mW1VHEy+WbtYMPoKy08JTc7zr1tv/vQGr3XExwlC9iixZXMaVt1kP1 +TEVHv2FiUOiZsVaqtoFpS/wBvKeQdkzNy+66pRpG9bLuOnL4hlz+rwHkdBmHGk+q +cXdwglqIDqXKlCpIMSkFPH1364KLdJ2qBgWWoWCJjUmgbrA8/LU6DX+GBbEiw45T +PQKP//RMkOrHOYRD33WTU0iKP61zn5+9RD5OLxEUOtCvL7AfB+jt4vYrMTT2U3Kl +OckWxNx55bYLdLfGKtepGV2r5xzce0UMbWQrXQRuka3a/j5VJUTuUgcwgd6FoP4N +4ObW2H1YEtE5M30xpa1kcqJ1RGEWagakISgn2Z3TywIDAQABoxMwETAPBgNVHREE +CDAGhwQKBaxNMA0GCSqGSIb3DQEBCwUAA4ICAQCE2eJVcvsMmJu6xAfoE6/u6BrD +opMicCtlC2qt0BgSIzzDQ/iWjnWKGM1C+pO+2G0WTczj7ugsxjPzhkyBpuEZaWt0 +9/tJTKIrgaRZvEe0ifsJxyqL5LJgfxK7TbDPcUBKr1v+TOxPVRq0FuG16x+yka4C +rwxfBHU43FmtEFfgu13r515F3ggXcdlojkce8ZKtTAGEcN0MpbJ6XS90BHU0sy5A +APTm0fR0vM3kg1nuBLbSGF5KfASdw13gb6QsDbll0IqK8LvXYiX5CaVfkAe/pFkO +/2iIxYW74yC2gV+DcFdRPVfFxSKrdg0tDER35OYg1/vXRjV5BWr1EjE3qjrCcUZy +rlF3fms7Arr20ka2nSa8avn4ALpyJZmKasoxNAAsxivingNVZkql48OqsJ3n0qGk +LI6Yu+UM/pc78a3NHsdsCbnf8qvae4oJa1kyiochJu+gUOzHvs4Ydti9iTQn2Byo +2A2LzyVPBmSOhzdQ7SwpvHA4A2ftao+dZoA/+o4rmBtbmgxjpBPyPJTN0ZfKlpKl +Oyi57ov+cJmZctSUbP3M11gBva7aYu1Rd7/eXeCEl1FHhmKL/Ee+UrNZLiwspb2E +Sa+pOHdJX8VgsIYXku2UKaGT2QFITxO7fnxghioxgsyCKrQ+m1gL9vgXj/gJu+48 +c+5CZ9SobLdMkVOtQQ== +-----END CERTIFICATE----- diff --git a/tests/integration/test_tlsv1_3/certs/server-ext.cnf b/tests/integration/test_tlsv1_3/certs/server-ext.cnf new file mode 100644 index 00000000000..83d9b03ccb7 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/server-ext.cnf @@ -0,0 +1 @@ +subjectAltName=IP:10.5.172.77 diff --git a/tests/integration/test_tlsv1_3/certs/server-key.pem b/tests/integration/test_tlsv1_3/certs/server-key.pem new file mode 100644 index 00000000000..065a2290749 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/server-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQC8jV8igQGgCvu/ +7BJDI5VQl43VGAFjH2Na/E9P4E5uwkSlJVED1WKvIlxRWhOaQOfC587nZVhQtHpd +bCvBdKrHml4SVbTchs5SN2kZsHeqaQzcGnejnczE0SYo4xNyniSvGiQ1M8G3fiZN +flEIPM/+Ob2oI3YnVWFGy0a5rQcHZWS45KuGILMP0aRHyzyh/31cK3i2xA7A3V2j +BNuD4kHG8TLgfDeoCecTI0iU/LJnDOolX5XdpyeoJ6YyYOGg3F9ebRmbNlJN3Iky +3Vzyc4jYG7y6f5DqfebYMW6hCvLpf9lN6/gPNOb2KjL3hvJ+hbj+b9EkVAzpw7mW +1VHEy+WbtYMPoKy08JTc7zr1tv/vQGr3XExwlC9iixZXMaVt1kP1TEVHv2FiUOiZ +sVaqtoFpS/wBvKeQdkzNy+66pRpG9bLuOnL4hlz+rwHkdBmHGk+qcXdwglqIDqXK +lCpIMSkFPH1364KLdJ2qBgWWoWCJjUmgbrA8/LU6DX+GBbEiw45TPQKP//RMkOrH +OYRD33WTU0iKP61zn5+9RD5OLxEUOtCvL7AfB+jt4vYrMTT2U3KlOckWxNx55bYL +dLfGKtepGV2r5xzce0UMbWQrXQRuka3a/j5VJUTuUgcwgd6FoP4N4ObW2H1YEtE5 +M30xpa1kcqJ1RGEWagakISgn2Z3TywIDAQABAoICAQC11lTwLp/Fm7IL9fvquc9P +CMmkv2DfGi80WO2YJ8ccM8gFyEYoP0rLgYSshAUxlvSr1+iG6grQ0izMGfzctcnZ +c3rTjco9fthNG9kFCFVvh536SqAkr5MCIH3/onZn7DGOmNRgZoikkEkaJP66xgME +tuS72W8iIcoNfw63FDIaJOONGCJ+2Nw3HkOjZVIVHRLlp5rkD5H218Vs6MtWlgY/ +eO9K5SC7sskhgL6HyGe40BCjeFpMh97L4Wj7XslZ3A0xQGAYervHES9TWX5A58EK +QT2yUkIMktzklE+PicKYA08rQa1Z5Pf0YOAELSWBdS7iWi3FLjXB35sE5rbT5puH +9hZXSDWLggbefuaUJyowDEZy2aHP5pvXKBDhEANRbU8VaDyHhlNLqVNquE5Cn4HO +zPeH+KLFbbABUok7yYZmIC9Bfn+rXvNzTX6A13AdJI/HcKA5RBGtpAY/168Pt/Aq +dzuqepu42rFAvS45RNevp72IIavx/QdAA1OTgKxh3c2Mf85pIXJ51aWWLnn+EZ5/ +EsE0crfwkuKJvjubNC4oOwMTFMIBI2WsjvaAw8pQw0Kb0ksExKd0wz9mKcqR/v0I +K9oYsaHkx5je0NOZds385+zCoQHYaw1aKUd7ZLqr5G/Nf/2TEYpMWco4ETA8lzu3 +Ty/8XkNw8jd4p+7bUuz1mQKCAQEA4MNU7GWDPwUKNNSz335nGH2oBvSGbYiwLcRM +D+x2+RTfOAFSSJ+Q5tQ+327ZkAB5dK2mxmDYKB+Ln1UBIneViUflkMyh4fuutIXI +wYo+BL71r89MqhRvvMK9hWnCGtJTJedf2iQENJzVn4J76BvTPRYywBv9pofPOlj1 +MtwwMA4CZAmQpCUaF5NQr4nliYx+slkcKwlm+cOxeZGa8mkNgQdmCcTZkRz6qsiR +vQDEDiS1+5lCJ6nWW4L2tOPejNN//hVlbPGMaA0oiu7I7w4aSxnTlLhDgJzJwmN8 +NFYl+u5AcPq9iRtBnzfPmd87S9bg10zcIiMKxw898sU24Pa0jQKCAQEA1sG5hO3c +4API//k7NEWXsx5Ns2JE/AV1LtmBgqXkn1DAJ+b6V1nIUppTs0zspEWrae9KrsAk +z47qIbPaTLHuptLrvEXk2LVfzcK32a7fXXDOB5KkBhzlJM1J3PTRQFR9lr7qX6vr +EDc4p7p55IDEGnJdXa7x+z56QjpAZaHlzexQxvoWWoLBkDuoT389sdU7CbgTa4A+ +CR6D6qKd6H6tfmv5sPlvp+aje+ObacP9I4WyVjscWkzBHxS3n/fTLjY6OFv+o8PM +TdytN4+HZnu4MDJlF3vx9P6CbnnVCaScXDxPGcoSJPcoEQqoyxuvUQLDUQkzWF14 +02EvXW0dbgiPtwKCAQA0EUwFD2ceHD7HClc4+QFNDR71rYPOsBGQKJ8uOSs+fHVR +dgznwf9BWf3OqNFBqLp6KxgtcJXihZxEpt6Ca416pesqZh1CSpmoPC3LmAjR9KLZ +vX4XEHDqG3roAx3yNLMKXtU3pYxL2+Eo+INXu8ptpkzPcCyMfX2mGKGEzLllCHnJ +TuXxAJ9QwtG4OIuyF5fqHPaHicAPMCRW80If0fJM57fdn3p/QWVYVupcDGdel2aJ +CHHo2lFMFcStFvShTwWhiLdcS4CpQhMYTETEDFJO/4aiNyV8D9Y1b/J/9U0LGlJX +Wd66elPzXGx9StdjtD2V4rpENjXy8zb4nHMgHkapAoIBACvvtmTbxTSPka/M7a/k +DQU4Te1FTZfCBhdvqG9yQTPW8Xk4aD82vyUnLbihJEj3d/pUWpMl/GH6eywp/59x +R8IZpOD/67HqaY9PJw4CGPClA4HJHoWho7/DwDjUXXsrzgXpSUoJgi3vHkgyfn2h +Wn2OqEtiX19niNvDzyj71mgq0Nvkjm42EiPQEL8y6QxY85spbc+wjQCQnayDWIsY +X6ZdsNfkMFPJe+j8x+77ie6ai8HYlhRjX59cPbUcnrf1oDOnnpEincnQPCAB3VG6 +PhSeOtBzKy1UZJr1kgBHDTZRoF1GWi/14NybsazcHSIVzp/lofuSJAYa+/XBPSQl +3EECggEBALSLZPdg13906LEyznYnjgq+nMh88usegvU9qsBAFExClLLfr6Ak77og +boNoOwbaFn+xiz5M8BTJIPizJcm5GjYaqg58zotTtG51h6VgMri+fb/BUpVr7p7n +aSq3kXDZlrwZnmooCT+KcGx++w2N2SYSyZX1TELt/dpfuWJvph+E37PkONEEiHPF +ZtSA/f9lpfP5/nx1pLmv4ksKdXqpz3/kNqaf9zbhQLgOm/VoBHL4NVPYRylGpCJb +R68/7yvHBd2EskZoJB53TlJmwu+fC6ee1UiG6aqTULfEsiGidi6jIt56Gz52ox66 +BHL/JsJ0Be5xM3V4x4PtihQ3Dw546FY= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_tlsv1_3/certs/server-req.pem b/tests/integration/test_tlsv1_3/certs/server-req.pem new file mode 100644 index 00000000000..be2f756cc7b --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/server-req.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEmzCCAoMCAQAwVjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx +ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGc2Vy +dmVyMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAvI1fIoEBoAr7v+wS +QyOVUJeN1RgBYx9jWvxPT+BObsJEpSVRA9ViryJcUVoTmkDnwufO52VYULR6XWwr +wXSqx5peElW03IbOUjdpGbB3qmkM3Bp3o53MxNEmKOMTcp4krxokNTPBt34mTX5R +CDzP/jm9qCN2J1VhRstGua0HB2VkuOSrhiCzD9GkR8s8of99XCt4tsQOwN1dowTb +g+JBxvEy4Hw3qAnnEyNIlPyyZwzqJV+V3acnqCemMmDhoNxfXm0ZmzZSTdyJMt1c +8nOI2Bu8un+Q6n3m2DFuoQry6X/ZTev4DzTm9ioy94byfoW4/m/RJFQM6cO5ltVR +xMvlm7WDD6CstPCU3O869bb/70Bq91xMcJQvYosWVzGlbdZD9UxFR79hYlDombFW +qraBaUv8AbynkHZMzcvuuqUaRvWy7jpy+IZc/q8B5HQZhxpPqnF3cIJaiA6lypQq +SDEpBTx9d+uCi3SdqgYFlqFgiY1JoG6wPPy1Og1/hgWxIsOOUz0Cj//0TJDqxzmE +Q991k1NIij+tc5+fvUQ+Ti8RFDrQry+wHwfo7eL2KzE09lNypTnJFsTceeW2C3S3 +xirXqRldq+cc3HtFDG1kK10EbpGt2v4+VSVE7lIHMIHehaD+DeDm1th9WBLROTN9 +MaWtZHKidURhFmoGpCEoJ9md08sCAwEAAaAAMA0GCSqGSIb3DQEBCwUAA4ICAQAb +FDegAoUBz9O4JR1u68IMnGkO5nINGAPQOqf9a2BxGujnSB7Lw6SHukjkUqqgnfQ0 +x/aWOI8JVAi/ptscojgMQUDsVNsij5v+jbJE+ZAobxnTmKP0wTc2ktpf4d8UMVc8 +gyM85jLHZ8caCcuy0D97W81vgIv33dNHWtP+sfbQhX9wJ2YQTahIC8NpuQfLAOUH +EFxWil0mfN+9vRQ1C5naKtvrOPqyM0RPrWiudIJ5QjI4aSXxUCupxxnaQMoI0Y50 +MvVVT3VwWgP+hL4b+yEJFHRpE7BwCZijsLIXkXmVZoveHhiSMYen1HWIP1VMDEHP +CUtG5UQcA78CBS8qg4nyFbDU4hWClAkAt96O8Y2epJYepIoYuBBSEfrgupESMLjS +E9Hfq/H6Ac/Q3zWa320udvA+ysfS8pagkoiH9+TarrsDjhxLjg2h2bGcXKlrsP1R +mRVZwfNOl3/ZNq5HBPb9Z5WXKvcsTCQAlnHJdaSmzdyArB0guwUHg8ZZNZqCdVgL +TPsfE84yI/HlwRfuQILfGxq99p/UYFwnee5CoM/PPvaAT+9z/lykMWZA7osuBcK6 +zP8XneGmZOkmez5+YJgSC0xeaDxr2R52eQXlQEJGDbFDtQap/X+cJDGyqmGnbhSu +6XkGy0l8mAkpcurMcy3wWf6+joskZAN4Joi4ZjKsQA== +-----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_tlsv1_3/certs/wrong-cert.pem b/tests/integration/test_tlsv1_3/certs/wrong-cert.pem new file mode 100644 index 00000000000..ef95a73deba --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/wrong-cert.pem @@ -0,0 +1,32 @@ +-----BEGIN CERTIFICATE----- +MIIFjTCCA3WgAwIBAgIUL2Y/QpwqqHyi43PwPeA6ygdPYK4wDQYJKoZIhvcNAQEL +BQAwVjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGY2xpZW50MB4XDTIy +MDIxODA5NDMxMFoXDTMyMDIxNjA5NDMxMFowVjELMAkGA1UEBhMCUlUxEzARBgNV +BAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0 +ZDEPMA0GA1UEAwwGY2xpZW50MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKC +AgEAxO2PSeaiNFMRRiFXpnMw07u6EIdEc1Jx3cPvZjEUg/pdEmMYkrSxr2MeqRkl +tWH8TcIIoiWDLIcM6IU0mF6a5ULu84hFb9b20qRG3wRNb5yO86HnoyzU99t98++a +9iaY1QAt03k8wq4jRjU2k/eoVSoLT5uVP5KxiNzdS2BTHFSsxrt/xcwdgkfJouHN +p+MYUekk6qaQy5fTqTpqdkgO2v/JoYCi0whBNj205d+WnS7xfeyVSJP1OJWHRZ7K +Y+LU6hz6wHIng4s/ag7VdAk0PArWs50BmH5g2zJfvt7VeTQebaJWUtSEY05odOqt +KZteUmmhxW/2M73wGVF3WAJCnaxypsjcmMZFCpMXpwyTFrqobvC3APl6SOP+Ev1M +LxhhCIDuLFu46P55KKEKjUCsYigd1VsHjjvoajGcqlPlMsVHJc9VChsQDz6agzDP +Fb/LyYbrDTTmsI57/s1jAZyemq2SEYPApJvcdZ/ucl741jI0671EZPlip9iUQgt3 +MHlc3t53/GtF2W6GF5Fogch7c+0c2BhMupAHAXwfLABvv5X8GDyjsNlwB6ea9jeC +Hw+0rEotZzCXId3daFytGNm1jI216kXLSbvz6uz1wMGS6Hrhk87whgvQ58RMNs1K +SGDFw1WFv+QZeTO7wqcn8Y/eqF7q9RBhOpPMJMX8Sx/UXuECAwEAAaNTMFEwHQYD +VR0OBBYEFCI7Iy7tY0D4HPa9BZCZxYuJ51mZMB8GA1UdIwQYMBaAFCI7Iy7tY0D4 +HPa9BZCZxYuJ51mZMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggIB +AIKYtBwTp3yvUGSXorV32dnU0Hp0MOie/itgx/la6b3h2bZSoCigKmcmvMaAaNzA +pxeYSsf5wPnONpWfo9hsGrUDMT4ETnXdzA1dbidIrhJbGsY8CN217Qt3YZWNWkrz +xLwxEwAovQZqnGDvtx+tRE8i6YJO6/kca+GB7liHFvUx8zaQ6gCwfloduG8rOAeq +noeCpW/zqYQSQGK35ntQ8MTTRbi7jMOTCikvRlldS73ODQcAR7jywgBYf/i8ANtz +NoWa4KbWuqKsQKMIGOi1fMLMaNlDSzJyw6UJ2GVCcL1NxkCZi0yudfAAxWlRis9G +zLjm7YdNBiC6RVZudGhvzjlsLZpE9DgiwXqcDv3Y1dpstD5ikrNhlQo6THH1YeFy +B8vjVGZZZu4B2JEo+QWH+zFGJosD66YoaKMVuwRPwoGDQoO0Pfbpq41A4KUhB3cf +X49/rbInqwsN5MuGp4l4+T7k7Wm0Y1Qo4FXDVbFxHvvniyHUsZk9Llzf5wBLl84m +xheUGgCHSflfXuuWi76yoADHCv+Eqi4/aLJmkUewKXJlm+XYs9bdBHUI+Y10KmhA +hgcHXF56L+N4mLRwUuLxa5qwQIqNX32+piQoO9opxnVKKCptpATLE30TOMLEXBlp +J+6b1e4BIasAAEGUhTgPj/SLL0u59Bv0K5SlSn7VZ0gI +-----END CERTIFICATE----- diff --git a/tests/integration/test_tlsv1_3/certs/wrong-key.pem b/tests/integration/test_tlsv1_3/certs/wrong-key.pem new file mode 100644 index 00000000000..b2213cd2675 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/wrong-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDE7Y9J5qI0UxFG +IVemczDTu7oQh0RzUnHdw+9mMRSD+l0SYxiStLGvYx6pGSW1YfxNwgiiJYMshwzo +hTSYXprlQu7ziEVv1vbSpEbfBE1vnI7zoeejLNT3233z75r2JpjVAC3TeTzCriNG +NTaT96hVKgtPm5U/krGI3N1LYFMcVKzGu3/FzB2CR8mi4c2n4xhR6STqppDLl9Op +Omp2SA7a/8mhgKLTCEE2PbTl35adLvF97JVIk/U4lYdFnspj4tTqHPrAcieDiz9q +DtV0CTQ8CtaznQGYfmDbMl++3tV5NB5tolZS1IRjTmh06q0pm15SaaHFb/YzvfAZ +UXdYAkKdrHKmyNyYxkUKkxenDJMWuqhu8LcA+XpI4/4S/UwvGGEIgO4sW7jo/nko +oQqNQKxiKB3VWweOO+hqMZyqU+UyxUclz1UKGxAPPpqDMM8Vv8vJhusNNOawjnv+ +zWMBnJ6arZIRg8Ckm9x1n+5yXvjWMjTrvURk+WKn2JRCC3cweVze3nf8a0XZboYX +kWiByHtz7RzYGEy6kAcBfB8sAG+/lfwYPKOw2XAHp5r2N4IfD7SsSi1nMJch3d1o +XK0Y2bWMjbXqRctJu/Pq7PXAwZLoeuGTzvCGC9DnxEw2zUpIYMXDVYW/5Bl5M7vC +pyfxj96oXur1EGE6k8wkxfxLH9Re4QIDAQABAoICAQCjj/CAX/f/X7MsPYtQa8J1 +Sinbio42/pYmrJPNnBw/FhZxrC7/wucGFlyj9IgWZCEr8Go9SsztkeoNwn2RxJoA +q5xOV7PclX4CLIHUv/0VI8Kz5pi/NgBZMUwm7K8Xna041OI7ECqARCR2LsJ7GasN +uVMVttK6r7uXQmLnNUUydb3ffmI8xjEIQVnfWI74z60mc2+/GcOP5jXeC+/a+DSm +fudYpcAXaXbId24ls5SkTxYzEepYEtQNQFzPXXkah49yN8mpR+c74c805scxjmd9 +Kz9yhYiKwQTvaqKNpQVHmxte0gPC3lJrLPejjDtxIGOyLZw4oaqrBSpDzR9D0PTE +C+BR6VlXpVCTcAoiweuoDIxNTiJ5IbIJme3iMWxsAIJ4n10rSFFl9Cmmqbphp/6/ +XInB0X7Zyr1kBrwf+DH6DJhje5NXgGKVR9oe9jjW5v8V2tg1RrkzNU8iKBSxpvcI +x4mKhhRLYgoq/iNeYBVQrwJYktIbweVCQ5Spj7/20IrMkn3FAmMsZxGMZmLisJ9t +B0vvUkUgWxuJTsPJ2j+ytpGT0E2xIDYCpbG2EopDc8WvHcVNhagBvLC6xIjIKm7N +2zpBU2W3fPNXoToCAmaLDPYeRRpG6XaGFQAfvKUQRLBDGTfQ177qr34UBnmgvxDq +J2gA9rQm3XziLMuSlJexAQKCAQEA+yz49Ah7FFq0QffsoRb0qOJbfcmMGTRkaafb +ztto4EFSnjH2EwoSShu4DfqWw+ws1KxHlItNHHko5pVNpS4lj1OpnobW3QD7kEIV +mYKa3KowYUcCq1Gzq2RNDZqsC2BSXwx1MG0VVKYOahnu5bvzQq2Ft8W7CWBnbTbY +0Jxjs4KaOza+bH7Vfb5Yre0tlW7U5vI/YO8+YKxpxfOU9kVo8ZLQ/9r/YH8nnLa+ +Fd91+WjcUW8CTKU+Oz3lb/Vwcs6YOoAraq/wtOCqWURunBXkQtzOIn0bgBh3WEk1 +EQ+MVDHshlVVjv/rfnL571ZTT1amCJuEIwQRzLSvbso883srMQKCAQEAyLXaG3Pp +LYiRKu7Bqr5PPuqdT72UFabPpfgd5EtcFOL0xUpfRya6HyFdM25FWI8haXeg4e8N +0cIs3gMG+RRgm1xISJIZi92L0Cwj+kLFu2U5SkvAKMqZFh5q350FRi4Bp7ae4YrL +aguWLZCxhznh4D5xQGM6c8ObRfUUEMT+dnLPcj4zn9KHhoUudXjLKjPNw5v6nkbw +xtRdwANlHx/LX/d4+iwt2plDWmT+d2OLvqZcPyyghTMqV45L0p9XAXBsLnz4Zipx +7VJ8iH3jL5oaQ6YAFY+cXIrWBN0q3UYbXdkaA2ve6voioeF3KQNRmU10k7GKNRWl +pRNn62+rAR8isQKCAQAZnPVqFS9P3QwCqiCEMM4UJrkDs7jInTIcIBTnHDKuo5qk +LR4VxPImgnsbWdFj+0J7EXJfMHFVlPlZwiHf1TvZSMPEOaXRdZcxl7uSIuJd3DEA +ynf4NmWm9Zxx5bLjmhfsP1336TfCoQhZQ3m8DZV52C4Jlm1DQIRre6tSYpA8LvZB +UYzLjYeBwhZS7hu24E1vm4ZhASSQQSSsHfGzx1IzSDBt1swx7+V/MpdhrZ7fJxVI +bJSEcllNOzuZViL4Yh7d4FINGBHor/xPDA5ndkgHlXKjy7QxNM1+wEBcFATQVSL0 +c+E8qtY918Wq5VergH9/4zPvSivyfv5gwtjCT24RAoIBABP6HbJb0BqrHB/U0cvn +00Vk3rGAIgwhpUtUrcz6PzkI+enlJCSV0zKkBH3I/Pf6jw3LTWUPgSWemQ6j6H7E +K3VrMvqeKBLGw1K+Afq3yKyFP7WIYqDswV31Oxf0rgC1NY7220uBoAt3CcSRQUo/ +VZ8XN/h7p+a70mmdIhklMlqhxMoPLN48eybFfMFOe5JAw7szfDdiwjZYDti8vcTi +SkDMBeuImCvI025c3QMPEmqwbkAPdg6r8Av06tEU8PkAspPR9ntcwCgp7KE9Pm6P +fQu8qwd6WsrPOswTI2AQyUqHAFLU2sQyj13jbhPT87w5fF/y7NmpxOnwS4igfbnH +2pECggEBALO0FiJClb0GSqMXYJ+nbtRObD4AynYNVMEqYdZu5DBb6vb4T7uumTD5 +I1fKOa5SSELngUj23p2G6sVBsDyDHotGJYJwDGejHOFnEpY+J0Das0pGS40FsFC7 +qABIUaMoLKcIR9Ofcm9uu2n+koNULV2aaXj7A4OYhRCQi2PqiEx1wimdrLfGqTXn +O4rSf826ODch87vuPbfFPCaIFG28R3nByp/ZBH5QNiB3NBmc3A0tiHFnZW3cpOfW +Jm/Vu0PcNVVw32SroS2FCroR7qSWsvt61UzJtliLUiFHoUAxrXXiAxcZW1D2Hmpq +neUhT/t9hHdcMJgoxm2IITf6ip8nTnY= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_tlsv1_3/configs/ssl_config.xml b/tests/integration/test_tlsv1_3/configs/ssl_config.xml new file mode 100644 index 00000000000..9e686b55567 --- /dev/null +++ b/tests/integration/test_tlsv1_3/configs/ssl_config.xml @@ -0,0 +1,44 @@ + + + 8443 + + + + + + + + false + /etc/clickhouse-server/config.d/server-cert.pem + /etc/clickhouse-server/config.d/server-key.pem + /etc/clickhouse-server/config.d/dhparam4096.pem + /etc/clickhouse-server/config.d/ca-cert.pem + sslv2,sslv3,tlsv1,tlsv1_1,tlsv1_2 + + true + false + false + false + true + relaxed + + + false + sslv2,sslv3,tlsv1,tlsv1_1,tlsv1_2 + true + true + false + false + false + true + relaxed + + + + \ No newline at end of file diff --git a/tests/integration/test_tlsv1_3/configs/users_with_ssl_auth.xml b/tests/integration/test_tlsv1_3/configs/users_with_ssl_auth.xml new file mode 100644 index 00000000000..c41776f9e78 --- /dev/null +++ b/tests/integration/test_tlsv1_3/configs/users_with_ssl_auth.xml @@ -0,0 +1,22 @@ + + + + + + client1 + + + + + client2 + client3 + + + + + + + qwe123 + + + diff --git a/tests/integration/test_tlsv1_3/test.py b/tests/integration/test_tlsv1_3/test.py new file mode 100644 index 00000000000..80c9c68eca7 --- /dev/null +++ b/tests/integration/test_tlsv1_3/test.py @@ -0,0 +1,236 @@ +import pytest +from helpers.cluster import ClickHouseCluster +import urllib.request, urllib.parse +import ssl +import os.path + +HTTPS_PORT = 8443 +NODE_IP = "10.5.172.77" # It's important for the node to work at this IP because 'server-cert.pem' requires that (see server-ext.cnf). +NODE_IP_WITH_HTTPS_PORT = NODE_IP + ":" + str(HTTPS_PORT) +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "node", + ipv4_address=NODE_IP, + main_configs=[ + "configs/ssl_config.xml", + "certs/server-key.pem", + "certs/server-cert.pem", + "certs/ca-cert.pem", + "certs/dhparam4096.pem", + ], + user_configs=["configs/users_with_ssl_auth.xml"], +) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def get_ssl_context(cert_name): + context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + context.load_verify_locations(cafile=f"{SCRIPT_DIR}/certs/ca-cert.pem") + if cert_name: + context.load_cert_chain( + f"{SCRIPT_DIR}/certs/{cert_name}-cert.pem", + f"{SCRIPT_DIR}/certs/{cert_name}-key.pem", + ) + context.verify_mode = ssl.CERT_REQUIRED + context.check_hostname = True + return context + + +def execute_query_https( + query, user, enable_ssl_auth=True, cert_name=None, password=None +): + url = f"https://{NODE_IP_WITH_HTTPS_PORT}/?query={urllib.parse.quote(query)}" + request = urllib.request.Request(url) + request.add_header("X-ClickHouse-User", user) + if enable_ssl_auth: + request.add_header("X-ClickHouse-SSL-Certificate-Auth", "on") + if password: + request.add_header("X-ClickHouse-Key", password) + response = urllib.request.urlopen( + request, context=get_ssl_context(cert_name) + ).read() + return response.decode("utf-8") + + +def test_https(): + assert ( + execute_query_https("SELECT currentUser()", user="john", cert_name="client1") + == "john\n" + ) + assert ( + execute_query_https("SELECT currentUser()", user="lucy", cert_name="client2") + == "lucy\n" + ) + assert ( + execute_query_https("SELECT currentUser()", user="lucy", cert_name="client3") + == "lucy\n" + ) + + +def test_https_wrong_cert(): + # Wrong certificate: different user's certificate + with pytest.raises(Exception) as err: + execute_query_https("SELECT currentUser()", user="john", cert_name="client2") + assert "HTTP Error 403" in str(err.value) + + # Wrong certificate: self-signed certificate. + with pytest.raises(Exception) as err: + execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") + assert "unknown ca" in str(err.value) + + # No certificate. + with pytest.raises(Exception) as err: + execute_query_https("SELECT currentUser()", user="john") + assert "HTTP Error 403" in str(err.value) + + # No header enabling SSL authentication. + with pytest.raises(Exception) as err: + execute_query_https( + "SELECT currentUser()", + user="john", + enable_ssl_auth=False, + cert_name="client1", + ) + + +def test_https_non_ssl_auth(): + # Users with non-SSL authentication are allowed, in this case we can skip sending a client certificate at all (because "verificationMode" is set to "relaxed"). + # assert execute_query_https("SELECT currentUser()", user="peter", enable_ssl_auth=False) == "peter\n" + assert ( + execute_query_https( + "SELECT currentUser()", + user="jane", + enable_ssl_auth=False, + password="qwe123", + ) + == "jane\n" + ) + + # But we still can send a certificate if we want. + assert ( + execute_query_https( + "SELECT currentUser()", + user="peter", + enable_ssl_auth=False, + cert_name="client1", + ) + == "peter\n" + ) + assert ( + execute_query_https( + "SELECT currentUser()", + user="peter", + enable_ssl_auth=False, + cert_name="client2", + ) + == "peter\n" + ) + assert ( + execute_query_https( + "SELECT currentUser()", + user="peter", + enable_ssl_auth=False, + cert_name="client3", + ) + == "peter\n" + ) + + assert ( + execute_query_https( + "SELECT currentUser()", + user="jane", + enable_ssl_auth=False, + password="qwe123", + cert_name="client1", + ) + == "jane\n" + ) + assert ( + execute_query_https( + "SELECT currentUser()", + user="jane", + enable_ssl_auth=False, + password="qwe123", + cert_name="client2", + ) + == "jane\n" + ) + assert ( + execute_query_https( + "SELECT currentUser()", + user="jane", + enable_ssl_auth=False, + password="qwe123", + cert_name="client3", + ) + == "jane\n" + ) + + # However if we send a certificate it must not be wrong. + with pytest.raises(Exception) as err: + execute_query_https( + "SELECT currentUser()", + user="peter", + enable_ssl_auth=False, + cert_name="wrong", + ) + assert "unknown ca" in str(err.value) + with pytest.raises(Exception) as err: + execute_query_https( + "SELECT currentUser()", + user="jane", + enable_ssl_auth=False, + password="qwe123", + cert_name="wrong", + ) + assert "unknown ca" in str(err.value) + + +def test_create_user(): + instance.query("CREATE USER emma IDENTIFIED WITH ssl_certificate CN 'client3'") + assert ( + execute_query_https("SELECT currentUser()", user="emma", cert_name="client3") + == "emma\n" + ) + assert ( + instance.query("SHOW CREATE USER emma") + == "CREATE USER emma IDENTIFIED WITH ssl_certificate CN \\'client3\\'\n" + ) + + instance.query("ALTER USER emma IDENTIFIED WITH ssl_certificate CN 'client2'") + assert ( + execute_query_https("SELECT currentUser()", user="emma", cert_name="client2") + == "emma\n" + ) + assert ( + instance.query("SHOW CREATE USER emma") + == "CREATE USER emma IDENTIFIED WITH ssl_certificate CN \\'client2\\'\n" + ) + + with pytest.raises(Exception) as err: + execute_query_https("SELECT currentUser()", user="emma", cert_name="client3") + assert "HTTP Error 403" in str(err.value) + + assert ( + instance.query("SHOW CREATE USER lucy") + == "CREATE USER lucy IDENTIFIED WITH ssl_certificate CN \\'client2\\', \\'client3\\'\n" + ) + + assert ( + instance.query( + "SELECT name, auth_type, auth_params FROM system.users WHERE name IN ['emma', 'lucy'] ORDER BY name" + ) + == 'emma\tssl_certificate\t{"common_names":["client2"]}\n' + 'lucy\tssl_certificate\t{"common_names":["client2","client3"]}\n' + ) diff --git a/tests/performance/distinct_in_order.xml b/tests/performance/distinct_in_order.xml new file mode 100644 index 00000000000..834a6945622 --- /dev/null +++ b/tests/performance/distinct_in_order.xml @@ -0,0 +1,33 @@ + + + CREATE TABLE distinct_cardinality_high (high UInt64, medium UInt64, low UInt64) ENGINE MergeTree() ORDER BY (high, medium) + INSERT INTO distinct_cardinality_high SELECT number % 1e6, number % 1e4, number % 1e2 FROM numbers_mt(1e8) + + SELECT DISTINCT high FROM distinct_cardinality_high FORMAT Null + SELECT DISTINCT high, low FROM distinct_cardinality_high FORMAT Null + SELECT DISTINCT high, medium FROM distinct_cardinality_high FORMAT Null + SELECT DISTINCT high, medium, low FROM distinct_cardinality_high FORMAT Null + + SELECT DISTINCT high, medium FROM distinct_cardinality_high ORDER BY medium FORMAT Null + SELECT DISTINCT high, medium FROM distinct_cardinality_high ORDER BY high FORMAT Null + SELECT DISTINCT high, low FROM distinct_cardinality_high ORDER BY low FORMAT Null + SELECT DISTINCT high, medium, low FROM distinct_cardinality_high ORDER BY low FORMAT Null + + DROP TABLE IF EXISTS distinct_cardinality_high + + + CREATE TABLE distinct_cardinality_low (low UInt64, medium UInt64, high UInt64) ENGINE MergeTree() ORDER BY (low, medium) + INSERT INTO distinct_cardinality_low SELECT number % 1e2, number % 1e4, number % 1e6 FROM numbers_mt(1e8) + + SELECT DISTINCT low FROM distinct_cardinality_low FORMAT Null + SELECT DISTINCT low, medium FROM distinct_cardinality_low FORMAT Null + SELECT DISTINCT low, high FROM distinct_cardinality_low FORMAT Null + SELECT DISTINCT low, medium, high FROM distinct_cardinality_low FORMAT Null + + SELECT DISTINCT low, medium FROM distinct_cardinality_low ORDER BY medium FORMAT Null + SELECT DISTINCT low, medium FROM distinct_cardinality_low ORDER BY low FORMAT Null + SELECT DISTINCT low, high FROM distinct_cardinality_low ORDER BY high FORMAT Null + SELECT DISTINCT low, medium, high FROM distinct_cardinality_low ORDER BY high FORMAT Null + + DROP TABLE IF EXISTS distinct_cardinality_low + diff --git a/tests/performance/norm_distance_float.xml b/tests/performance/norm_distance_float.xml new file mode 100644 index 00000000000..e71d8eb6281 --- /dev/null +++ b/tests/performance/norm_distance_float.xml @@ -0,0 +1,95 @@ + + + + + element_type + + Float32 + Float64 + + + + + + CREATE TABLE vecs_{element_type} ( + v Array({element_type}) + ) ENGINE=Memory; + + + + + + INSERT INTO vecs_{element_type} + SELECT v FROM ( + SELECT + number AS n, + [ + rand(n*10), + rand(n*10+1), + rand(n*10+2), + rand(n*10+3), + rand(n*10+4), + rand(n*10+5), + rand(n*10+6), + rand(n*10+7), + rand(n*10+8), + rand(n*10+9), + rand(n*10), + rand(n*10+1), + rand(n*10+2), + rand(n*10+3), + rand(n*10+4), + rand(n*10+5), + rand(n*10+6), + rand(n*10+7), + rand(n*10+8), + rand(n*10+9), + rand(n*10), + rand(n*10+1), + rand(n*10+2), + rand(n*10+3), + rand(n*10+4), + rand(n*10+5), + rand(n*10+6), + rand(n*10+7), + rand(n*10+8), + rand(n*10+9), + rand(n*10), + rand(n*10+1), + rand(n*10+2), + rand(n*10+3), + rand(n*10+4), + rand(n*10+5), + rand(n*10+6), + rand(n*10+7) + ] AS v + FROM system.numbers + LIMIT 10000000 + ); + + + + 1 + + + + + + norm + + L1 + L2 + L2Squared + Linf + + + + + + SELECT sum(dist) FROM (SELECT {norm}Norm(v) AS dist FROM vecs_{element_type}) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT {norm}Distance(a, v) AS dist FROM vecs_{element_type}) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, v) AS dist FROM vecs_{element_type}) + + DROP TABLE vecs_{element_type} + + diff --git a/tests/performance/views_max_insert_threads.xml b/tests/performance/views_max_insert_threads.xml new file mode 100644 index 00000000000..2988984f5d8 --- /dev/null +++ b/tests/performance/views_max_insert_threads.xml @@ -0,0 +1,11 @@ + + + create table views_max_insert_threads_null (a UInt64) Engine = Null + create materialized view views_max_insert_threads_mv Engine = Null AS select now() as ts, max(a) from views_max_insert_threads_null group by ts + + insert into views_max_insert_threads_null select * from numbers_mt(3000000000) settings max_threads = 16, max_insert_threads=16 + + drop table if exists views_max_insert_threads_null + drop table if exists views_max_insert_threads_mv + + diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.expect b/tests/queries/0_stateless/01176_mysql_client_interactive.expect index 5bbc77ccf14..8d23b3bef60 100755 --- a/tests/queries/0_stateless/01176_mysql_client_interactive.expect +++ b/tests/queries/0_stateless/01176_mysql_client_interactive.expect @@ -2,6 +2,10 @@ # Tags: no-fasttest # Tag no-fasttest: requires mysql client +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -13,7 +17,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$MYSQL_CLIENT_BINARY \$MYSQL_CLIENT_OPT" expect -nocase -re "mysql.*> " diff --git a/tests/queries/0_stateless/01179_insert_values_semicolon.expect b/tests/queries/0_stateless/01179_insert_values_semicolon.expect index bf937c3a6a4..9d35941ae40 100755 --- a/tests/queries/0_stateless/01179_insert_values_semicolon.expect +++ b/tests/queries/0_stateless/01179_insert_values_semicolon.expect @@ -1,6 +1,10 @@ #!/usr/bin/expect -f # Tags: long +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -11,7 +15,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" expect ":) " diff --git a/tests/queries/0_stateless/01180_client_syntax_errors.expect b/tests/queries/0_stateless/01180_client_syntax_errors.expect index 6e4e975988e..da3dfbec6df 100755 --- a/tests/queries/0_stateless/01180_client_syntax_errors.expect +++ b/tests/queries/0_stateless/01180_client_syntax_errors.expect @@ -1,5 +1,9 @@ #!/usr/bin/expect -f +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -10,7 +14,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" expect ":) " diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index 9021ae2bb1a..a9801e3b910 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -1,9 +1,23 @@ -- { echoOn } -set parallel_view_processing=1; -insert into testX select number from numbers(10) settings log_queries=1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + + + +select 'optimize_trivial_insert_select=0', 'max_insert_threads=0'; +optimize_trivial_insert_select=0 max_insert_threads=0 +insert into testX select number from numbers(10) settings + log_queries=1, + parallel_view_processing=0, + optimize_trivial_insert_select=0, + max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select length(thread_ids) >= 8 from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1'; -1 +select arrayUniq(thread_ids) from system.query_log where + current_database = currentDatabase() and + type != 'QueryStart' and + query like '%insert into testX %' and + Settings['parallel_view_processing'] = '0' and + Settings['optimize_trivial_insert_select'] = '0' and + Settings['max_insert_threads'] = '0'; +2 select count() from testX; 10 select count() from testXA; @@ -12,11 +26,22 @@ select count() from testXB; 0 select count() from testXC; 10 -set parallel_view_processing=0; -insert into testX select number from numbers(10) settings log_queries=1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +select 'optimize_trivial_insert_select=0', 'max_insert_threads=16'; +optimize_trivial_insert_select=0 max_insert_threads=16 +insert into testX select number from numbers(10) settings + log_queries=1, + parallel_view_processing=0, + optimize_trivial_insert_select=0, + max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select length(thread_ids) >= 5 from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '0'; -1 +select arrayUniq(thread_ids) from system.query_log where + current_database = currentDatabase() and + type != 'QueryStart' and + query like '%insert into testX %' and + Settings['parallel_view_processing'] = '0' and + Settings['optimize_trivial_insert_select'] = '0' and + Settings['max_insert_threads'] = '16'; +2 select count() from testX; 20 select count() from testXA; @@ -25,3 +50,147 @@ select count() from testXB; 0 select count() from testXC; 20 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=0'; +optimize_trivial_insert_select=1 max_insert_threads=0 +insert into testX select number from numbers(10) settings + log_queries=1, + parallel_view_processing=0, + optimize_trivial_insert_select=1, + max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +system flush logs; +select arrayUniq(thread_ids) from system.query_log where + current_database = currentDatabase() and + type != 'QueryStart' and + query like '%insert into testX %' and + Settings['parallel_view_processing'] = '0' and + Settings['optimize_trivial_insert_select'] = '1' and + Settings['max_insert_threads'] = '0'; +2 +select count() from testX; +30 +select count() from testXA; +30 +select count() from testXB; +0 +select count() from testXC; +30 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=16'; +optimize_trivial_insert_select=1 max_insert_threads=16 +insert into testX select number from numbers(10) settings + log_queries=1, + parallel_view_processing=0, + optimize_trivial_insert_select=1, + max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +system flush logs; +select arrayUniq(thread_ids) from system.query_log where + current_database = currentDatabase() and + type != 'QueryStart' and + query like '%insert into testX %' and + Settings['parallel_view_processing'] = '0' and + Settings['optimize_trivial_insert_select'] = '1' and + Settings['max_insert_threads'] = '16'; +2 +select count() from testX; +40 +select count() from testXA; +40 +select count() from testXB; +0 +select count() from testXC; +40 +select 'optimize_trivial_insert_select=0', 'max_insert_threads=0'; +optimize_trivial_insert_select=0 max_insert_threads=0 +insert into testX select number from numbers(10) settings + log_queries=1, + parallel_view_processing=1, + optimize_trivial_insert_select=0, + max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +system flush logs; +select arrayUniq(thread_ids) from system.query_log where + current_database = currentDatabase() and + type != 'QueryStart' and + query like '%insert into testX %' and + Settings['parallel_view_processing'] = '1' and + Settings['optimize_trivial_insert_select'] = '0' and + Settings['max_insert_threads'] = '0'; +5 +select count() from testX; +50 +select count() from testXA; +50 +select count() from testXB; +0 +select count() from testXC; +50 +select 'optimize_trivial_insert_select=0', 'max_insert_threads=16'; +optimize_trivial_insert_select=0 max_insert_threads=16 +insert into testX select number from numbers(10) settings + log_queries=1, + parallel_view_processing=1, + optimize_trivial_insert_select=0, + max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +system flush logs; +select arrayUniq(thread_ids) from system.query_log where + current_database = currentDatabase() and + type != 'QueryStart' and + query like '%insert into testX %' and + Settings['parallel_view_processing'] = '1' and + Settings['optimize_trivial_insert_select'] = '0' and + Settings['max_insert_threads'] = '16'; +5 +select count() from testX; +60 +select count() from testXA; +60 +select count() from testXB; +0 +select count() from testXC; +60 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=0'; +optimize_trivial_insert_select=1 max_insert_threads=0 +insert into testX select number from numbers(10) settings + log_queries=1, + parallel_view_processing=1, + optimize_trivial_insert_select=1, + max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +system flush logs; +select arrayUniq(thread_ids) from system.query_log where + current_database = currentDatabase() and + type != 'QueryStart' and + query like '%insert into testX %' and + Settings['parallel_view_processing'] = '1' and + Settings['optimize_trivial_insert_select'] = '1' and + Settings['max_insert_threads'] = '0'; +5 +select count() from testX; +70 +select count() from testXA; +70 +select count() from testXB; +0 +select count() from testXC; +70 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=16'; +optimize_trivial_insert_select=1 max_insert_threads=16 +insert into testX select number from numbers(10) settings + log_queries=1, + parallel_view_processing=1, + optimize_trivial_insert_select=1, + max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +system flush logs; +select arrayUniq(thread_ids) from system.query_log where + current_database = currentDatabase() and + type != 'QueryStart' and + query like '%insert into testX %' and + Settings['parallel_view_processing'] = '1' and + Settings['optimize_trivial_insert_select'] = '1' and + Settings['max_insert_threads'] = '16'; +5 +select count() from testX; +80 +select count() from testXA; +80 +select count() from testXB; +0 +select count() from testXC; +80 diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql b/tests/queries/0_stateless/01275_parallel_mv.sql deleted file mode 100644 index 27b8ef96e0b..00000000000 --- a/tests/queries/0_stateless/01275_parallel_mv.sql +++ /dev/null @@ -1,39 +0,0 @@ -set max_threads = 0; - -drop table if exists testX; -drop table if exists testXA; -drop table if exists testXB; -drop table if exists testXC; - -create table testX (A Int64) engine=MergeTree order by tuple(); - -create materialized view testXA engine=MergeTree order by tuple() as select sleep(1) from testX; -create materialized view testXB engine=MergeTree order by tuple() as select sleep(2), throwIf(A=1) from testX; -create materialized view testXC engine=MergeTree order by tuple() as select sleep(1) from testX; - --- { echoOn } -set parallel_view_processing=1; -insert into testX select number from numbers(10) settings log_queries=1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } -system flush logs; -select length(thread_ids) >= 8 from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1'; - -select count() from testX; -select count() from testXA; -select count() from testXB; -select count() from testXC; - -set parallel_view_processing=0; -insert into testX select number from numbers(10) settings log_queries=1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } -system flush logs; -select length(thread_ids) >= 5 from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '0'; - -select count() from testX; -select count() from testXA; -select count() from testXB; -select count() from testXC; --- { echoOff } - -drop table testX; -drop view testXA; -drop view testXB; -drop view testXC; diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 new file mode 100644 index 00000000000..6b17a141d3e --- /dev/null +++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 @@ -0,0 +1,50 @@ +-- Tags: no-s3-storage +-- no-s3-storage: s3 has 20 more threads + +-- avoid settings randomization by clickhouse-test +set max_threads = 0; + +drop table if exists testX; +drop table if exists testXA; +drop table if exists testXB; +drop table if exists testXC; + +create table testX (A Int64) engine=MergeTree order by tuple(); + +create materialized view testXA engine=MergeTree order by tuple() as select sleep(0.1) from testX; +create materialized view testXB engine=MergeTree order by tuple() as select sleep(0.2), throwIf(A=1) from testX; +create materialized view testXC engine=MergeTree order by tuple() as select sleep(0.1) from testX; + +-- { echoOn } +{% for parallel_view_processing in [0, 1] %} +{% for optimize_trivial_insert_select in [0, 1] %} +{% for max_insert_threads in [0, 16] %} +select 'optimize_trivial_insert_select={{ optimize_trivial_insert_select }}', 'max_insert_threads={{ max_insert_threads }}'; + +insert into testX select number from numbers(10) settings + log_queries=1, + parallel_view_processing={{ parallel_view_processing }}, + optimize_trivial_insert_select={{ optimize_trivial_insert_select }}, + max_insert_threads={{ max_insert_threads }}; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +system flush logs; +select arrayUniq(thread_ids) from system.query_log where + current_database = currentDatabase() and + type != 'QueryStart' and + query like '%insert into testX %' and + Settings['parallel_view_processing'] = '{{ parallel_view_processing }}' and + Settings['optimize_trivial_insert_select'] = '{{ optimize_trivial_insert_select }}' and + Settings['max_insert_threads'] = '{{ max_insert_threads }}'; + +select count() from testX; +select count() from testXA; +select count() from testXB; +select count() from testXC; +{% endfor %} +{% endfor %} +{% endfor %} +-- { echoOff } + +drop table testX; +drop view testXA; +drop view testXB; +drop view testXC; diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect index 07bdbcdac76..bab1dd224cf 100755 --- a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect +++ b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect @@ -1,5 +1,9 @@ #!/usr/bin/expect -f +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 10 match_max 100000 @@ -14,7 +18,6 @@ expect_after { # useful debugging configuration # exp_internal 1 -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" expect ":) " diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect b/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect index 085a1140753..83eced841ce 100755 --- a/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect +++ b/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect @@ -1,5 +1,9 @@ #!/usr/bin/expect -f +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -10,7 +14,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" expect ":) " diff --git a/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect b/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect index ad5b7625929..06a60ed95a2 100755 --- a/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect +++ b/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect @@ -1,6 +1,10 @@ #!/usr/bin/expect -f # Tags: long +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -10,7 +14,6 @@ expect_after { # A default timeout action is to do nothing, change it to fail timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" expect ":) " diff --git a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect index 9c20b7c517e..fff0dd015e1 100755 --- a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect +++ b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect @@ -1,5 +1,9 @@ #!/usr/bin/expect -f +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -10,7 +14,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT" expect ":) " diff --git a/tests/queries/0_stateless/01520_client_print_query_id.expect b/tests/queries/0_stateless/01520_client_print_query_id.expect index 8b6e0e17a85..0e8f660041d 100755 --- a/tests/queries/0_stateless/01520_client_print_query_id.expect +++ b/tests/queries/0_stateless/01520_client_print_query_id.expect @@ -1,5 +1,9 @@ #!/usr/bin/expect -f +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -10,7 +14,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" expect ":) " diff --git a/tests/queries/0_stateless/01548_query_log_query_execution_ms.sh b/tests/queries/0_stateless/01548_query_log_query_execution_ms.sh index c973612c80d..0d13a1d4eff 100755 --- a/tests/queries/0_stateless/01548_query_log_query_execution_ms.sh +++ b/tests/queries/0_stateless/01548_query_log_query_execution_ms.sh @@ -4,11 +4,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -function random_str() -{ - local n=$1 && shift - tr -cd '[:lower:]' < /dev/urandom | head -c"$n" -} function test_query_duration_ms() { local query_id diff --git a/tests/queries/0_stateless/01565_reconnect_after_client_error.expect b/tests/queries/0_stateless/01565_reconnect_after_client_error.expect index 819450ffd30..035698f524b 100755 --- a/tests/queries/0_stateless/01565_reconnect_after_client_error.expect +++ b/tests/queries/0_stateless/01565_reconnect_after_client_error.expect @@ -4,6 +4,10 @@ # This is a separate test, because we want to test the interactive mode. # https://github.com/ClickHouse/ClickHouse/issues/19353 +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -15,7 +19,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn" expect "\n:) " diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.reference b/tests/queries/0_stateless/01710_minmax_count_projection.reference index 259d320a38a..bbcec98fb74 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.reference +++ b/tests/queries/0_stateless/01710_minmax_count_projection.reference @@ -2,7 +2,9 @@ 0 9998 5000 1 9999 5000 0 9998 5000 +0 9998 5000 1 +0 9998 5000 0 1 0 9999 @@ -14,6 +16,7 @@ 1 1 1 +2021-10-25 10:00:00 3 2021-10-27 10:00:00 3 \N 2021-10-27 10:00:00 3 0 2021-10-24 10:00:00 diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.sql b/tests/queries/0_stateless/01710_minmax_count_projection.sql index a6c04725583..7ceff6a2662 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.sql +++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql @@ -10,11 +10,15 @@ set max_rows_to_read = 2, allow_experimental_projection_optimization = 1; select min(i), max(i), count() from d; select min(i), max(i), count() from d group by _partition_id order by _partition_id; select min(i), max(i), count() from d where _partition_value.1 = 0 group by _partition_id order by _partition_id; +select min(i), max(i), count() from d where moduloLegacy(i, 2) = 0 group by _partition_id order by _partition_id; select min(i), max(i), count() from d where _partition_value.1 = 10 group by _partition_id order by _partition_id; -- fuzz crash select min(i) from d where 1 = _partition_value.1; +-- fuzz crash https://github.com/ClickHouse/ClickHouse/issues/37151 +SELECT min(i), max(i), count() FROM d WHERE (_partition_value.1) = 0 GROUP BY ignore(bitTest(ignore(NULL), 65535), NULL, (_partition_value.1) = 7, '10.25', bitTest(NULL, -9223372036854775808), NULL, ignore(ignore(-2147483647, NULL)), 1024), _partition_id ORDER BY _partition_id ASC NULLS FIRST; + drop table d; drop table if exists has_final_mark; @@ -54,6 +58,9 @@ select min(dt), max(dt), count() from d where toDate(dt) >= '2021-10-25'; select min(dt), max(dt), count(toDate(dt) >= '2021-10-25') from d where toDate(dt) >= '2021-10-25'; select count() from d group by toDate(dt); +-- fuzz crash +SELECT min(dt), count(ignore(ignore(ignore(tupleElement(_partition_value, NULL) = NULL), NULL, NULL, NULL), 0, '10485.76', NULL)), max(dt), count(toDate(dt) >= '2021-10-25') FROM d WHERE toDate(dt) >= '2021-10-25'; + -- fuzz crash SELECT pointInEllipses(min(j), NULL), max(dt), count('0.0000000007') FROM d WHERE toDate(dt) >= '2021-10-25'; SELECT min(j) FROM d PREWHERE ceil(j) <= 0; diff --git a/tests/queries/0_stateless/01710_projection_fetch_long.sql b/tests/queries/0_stateless/01710_projection_fetch_long.sql index fd12b84c817..29effc53745 100644 --- a/tests/queries/0_stateless/01710_projection_fetch_long.sql +++ b/tests/queries/0_stateless/01710_projection_fetch_long.sql @@ -1,4 +1,4 @@ --- Tags: long, no-s3-storage +-- Tags: long, no-s3-storage, no-backward-compatibility-check drop table if exists tp_1; drop table if exists tp_2; diff --git a/tests/queries/0_stateless/01710_projections_optimize_aggregation_in_order.sh b/tests/queries/0_stateless/01710_projections_optimize_aggregation_in_order.sh index 0cafa904a71..2c6a6ef35eb 100755 --- a/tests/queries/0_stateless/01710_projections_optimize_aggregation_in_order.sh +++ b/tests/queries/0_stateless/01710_projections_optimize_aggregation_in_order.sh @@ -29,12 +29,6 @@ $CLICKHOUSE_CLIENT -nm -q " INSERT INTO in_order_agg_01710 SELECT 1, number%2, number%4, number FROM numbers(100000); " -function random_str() -{ - local n=$1 && shift - tr -cd '[:lower:]' < /dev/urandom | head -c"$n" -} - function run_query() { local query=$1 && shift diff --git a/tests/queries/0_stateless/01710_projections_partial_optimize_aggregation_in_order.sh b/tests/queries/0_stateless/01710_projections_partial_optimize_aggregation_in_order.sh index f66dc9ff872..5a9c480c78c 100755 --- a/tests/queries/0_stateless/01710_projections_partial_optimize_aggregation_in_order.sh +++ b/tests/queries/0_stateless/01710_projections_partial_optimize_aggregation_in_order.sh @@ -35,12 +35,6 @@ $CLICKHOUSE_CLIENT -nm -q " INSERT INTO in_order_agg_partial_01710 SELECT 1, number%2, number%4, number FROM numbers(100000) LIMIT 50000, 100000; " -function random_str() -{ - local n=$1 && shift - tr -cd '[:lower:]' < /dev/urandom | head -c"$n" -} - function run_query() { local query=$1 && shift diff --git a/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect b/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect index 022320e2d4b..3d9c633eb44 100755 --- a/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect +++ b/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect @@ -1,5 +1,9 @@ #!/usr/bin/expect -f +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -10,7 +14,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" expect ":) " diff --git a/tests/queries/0_stateless/01848_http_insert_segfault.sh b/tests/queries/0_stateless/01848_http_insert_segfault.sh index c766e9794ae..1f2e9eebcdc 100755 --- a/tests/queries/0_stateless/01848_http_insert_segfault.sh +++ b/tests/queries/0_stateless/01848_http_insert_segfault.sh @@ -1,7 +1,9 @@ #!/usr/bin/env bash +# Tags: no-tsan +# Sometimes is takes longer than 60 seconds under TSan. - CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) - # shellcheck source=../shell_config.sh - . "$CUR_DIR"/../shell_config.sh +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh - ${CLICKHOUSE_LOCAL} -q "select col1, initializeAggregation('argMaxState', col2, insertTime) as col2, now() as insertTime FROM generateRandom('col1 String, col2 Array(Float64)') LIMIT 1000000 FORMAT CSV" | ${CLICKHOUSE_CURL} -s 'http://localhost:8123/?query=INSERT%20INTO%20non_existing_table%20SELECT%20col1%2C%20initializeAggregation(%27argMaxState%27%2C%20col2%2C%20insertTime)%20as%20col2%2C%20now()%20as%20insertTime%20FROM%20input(%27col1%20String%2C%20col2%20Array(Float64)%27)%20FORMAT%20CSV' --data-binary @- | grep -q "Table default.non_existing_table doesn't exist" && echo 'Ok.' || echo 'FAIL' ||: +${CLICKHOUSE_LOCAL} --query "select col1, initializeAggregation('argMaxState', col2, insertTime) as col2, now() as insertTime FROM generateRandom('col1 String, col2 Array(Float64)') LIMIT 1000000 FORMAT CSV" | ${CLICKHOUSE_CURL} -s 'http://localhost:8123/?query=INSERT%20INTO%20non_existing_table%20SELECT%20col1%2C%20initializeAggregation(%27argMaxState%27%2C%20col2%2C%20insertTime)%20as%20col2%2C%20now()%20as%20insertTime%20FROM%20input(%27col1%20String%2C%20col2%20Array(Float64)%27)%20FORMAT%20CSV' --data-binary @- | grep -q "Table default.non_existing_table doesn't exist" && echo 'Ok.' || echo 'FAIL' ||: diff --git a/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect b/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect index d5ce4c3cbf2..1be56675b33 100755 --- a/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect +++ b/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect @@ -1,6 +1,10 @@ #!/usr/bin/expect -f # Tags: long +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -10,7 +14,6 @@ expect_after { # A default timeout action is to do nothing, change it to fail timeout { exit 1 } } -set basedir [file dirname $argv0] # history file is not required, in-memory history is enough spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$basedir/01910_client_replxx_container_overflow_long.history.log" diff --git a/tests/queries/0_stateless/01933_client_replxx_convert_history.expect b/tests/queries/0_stateless/01933_client_replxx_convert_history.expect index c5645179ab3..111389e49b2 100755 --- a/tests/queries/0_stateless/01933_client_replxx_convert_history.expect +++ b/tests/queries/0_stateless/01933_client_replxx_convert_history.expect @@ -2,6 +2,10 @@ # Tags: no-parallel # Tag no-parallel: Uses non unique history file +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -11,7 +15,6 @@ expect_after { # A default timeout action is to do nothing, change it to fail timeout { exit 1 } } -set basedir [file dirname $argv0] exec bash -c "echo select 1 > $argv0.txt" exec bash -c "echo select 1 >> $argv0.txt" diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index 2f74b6e33ae..ca423ee106c 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -3,6 +3,10 @@ # This is a test for system.warnings. Testing in interactive mode is necessary, # as we want to see certain warnings from client +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -14,7 +18,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] set Debug_type 0 spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" diff --git a/tests/queries/0_stateless/02003_memory_limit_in_client.expect b/tests/queries/0_stateless/02003_memory_limit_in_client.expect index a3d6d04110a..a8e8c1d5786 100755 --- a/tests/queries/0_stateless/02003_memory_limit_in_client.expect +++ b/tests/queries/0_stateless/02003_memory_limit_in_client.expect @@ -4,6 +4,10 @@ # This is a test for system.warnings. Testing in interactive mode is necessary, # as we want to see certain warnings from client +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -15,8 +19,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] - # # Check that the query will fail in clickhouse-client # diff --git a/tests/queries/0_stateless/02011_tuple_vector_functions.reference b/tests/queries/0_stateless/02011_tuple_vector_functions.reference index 4c5067c7da2..1b54179cc87 100644 --- a/tests/queries/0_stateless/02011_tuple_vector_functions.reference +++ b/tests/queries/0_stateless/02011_tuple_vector_functions.reference @@ -21,7 +21,9 @@ 6 7.1 1.4142135623730951 +2 13 +169 1.5 -3 2.3 @@ -34,8 +36,10 @@ 2.0000887587111964 4 2.8284271247461903 +8 1 0 +0 -4.413254828250501e-8 (0.2,-0.8) (0.6,0.8) @@ -48,6 +52,7 @@ (NULL,NULL) \N \N +\N (2,4,NULL) \N \N diff --git a/tests/queries/0_stateless/02011_tuple_vector_functions.sql b/tests/queries/0_stateless/02011_tuple_vector_functions.sql index f34fb91586c..14f013937bb 100644 --- a/tests/queries/0_stateless/02011_tuple_vector_functions.sql +++ b/tests/queries/0_stateless/02011_tuple_vector_functions.sql @@ -28,7 +28,9 @@ SELECT scalarProduct(tuple(1), tuple(0)); SELECT L1Norm((-1, 2, -3)); SELECT L1Norm((-1, 2.5, -3.6)); SELECT L2Norm((1, 1.0)); +SELECT L2SquaredNorm((1, 1.0)); SELECT L2Norm(materialize((-12, 5))); +SELECT L2SquaredNorm(materialize((-12, 5))); SELECT max2(materialize(1), 1.5); SELECT min2(-1, -3); @@ -44,8 +46,10 @@ SELECT LpNorm((-1, -2), 11.); SELECT L1Distance((1, 2, 3), (2, 3, 1)); SELECT L2Distance(materialize((1, 1)), (3, -1)); +SELECT L2SquaredDistance(materialize((1, 1)), (3, -1)); SELECT LinfDistance((1, 1), (1, 2)); SELECT L2Distance((5, 5), (5, 5)); +SELECT L2SquaredDistance((5, 5), (5, 5)); SELECT LpDistance((1800, 1900), (18, 59), 12) - LpDistance(tuple(-22), tuple(1900), 12.); SELECT L1Normalize(materialize((1, -4))); @@ -61,6 +65,7 @@ SELECT cosineDistance((1, 0), (0.5, sqrt(3) / 2)); SELECT (NULL, 1) + (1, NULL); SELECT (NULL, 1) * materialize((1, NULL)); SELECT L2Norm((NULL, 3, 4)); +SELECT L2SquaredNorm((NULL, 3, 4)); SELECT 2 * (1, 2, NULL); SELECT (1, 1.0, NULL) / NULL; SELECT (1, 1.0, NULL) / materialize(NULL); diff --git a/tests/queries/0_stateless/02047_client_exception.expect b/tests/queries/0_stateless/02047_client_exception.expect index f7d4bfb555d..50ed09d03c5 100755 --- a/tests/queries/0_stateless/02047_client_exception.expect +++ b/tests/queries/0_stateless/02047_client_exception.expect @@ -1,5 +1,9 @@ #!/usr/bin/expect -f +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 20 match_max 100000 @@ -11,7 +15,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" expect ":) " diff --git a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect index ffa25b964db..a9905128ad5 100755 --- a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect +++ b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect @@ -1,5 +1,9 @@ #!/usr/bin/expect -f +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 20 match_max 100000 @@ -11,7 +15,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion" expect ":) " diff --git a/tests/queries/0_stateless/02105_backslash_letter_commands.expect b/tests/queries/0_stateless/02105_backslash_letter_commands.expect index e67d60912fa..707a544f6bb 100755 --- a/tests/queries/0_stateless/02105_backslash_letter_commands.expect +++ b/tests/queries/0_stateless/02105_backslash_letter_commands.expect @@ -1,5 +1,9 @@ #!/usr/bin/expect -f +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 02 match_max 100000 @@ -10,7 +14,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" expect ":) " diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect index 0abe25e60f4..4fd430a4a69 100755 --- a/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect @@ -1,6 +1,10 @@ #!/usr/bin/expect -f # Tags: no-parallel +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 20 match_max 100000 @@ -12,8 +16,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] - system "$basedir/helpers/02112_prepare.sh" spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT --disable_suggestion --interactive --queries-file $basedir/file_02112" expect ":) " diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect index c846464b011..a90e85d1069 100755 --- a/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect @@ -1,5 +1,9 @@ #!/usr/bin/expect -f +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 20 match_max 100000 @@ -11,7 +15,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion --interactive --query 'create table t(i Int32) engine=Memory; insert into t select 1'" expect ":) " diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect index c64f149a93c..34eac360132 100755 --- a/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect @@ -1,6 +1,10 @@ #!/usr/bin/expect -f # Tags: no-parallel +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 20 match_max 100000 @@ -12,8 +16,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] - system "$basedir/helpers/02112_prepare.sh" spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion --interactive --queries-file $basedir/file_02112" expect ":) " diff --git a/tests/queries/0_stateless/02112_with_fill_interval.sql b/tests/queries/0_stateless/02112_with_fill_interval.sql index 16773780515..d2416f9a84b 100644 --- a/tests/queries/0_stateless/02112_with_fill_interval.sql +++ b/tests/queries/0_stateless/02112_with_fill_interval.sql @@ -1,5 +1,3 @@ -SET max_threads = 1; - DROP TABLE IF EXISTS with_fill_date; CREATE TABLE with_fill_date (d Date, d32 Date32) ENGINE = Memory; diff --git a/tests/queries/0_stateless/02116_interactive_hello.expect b/tests/queries/0_stateless/02116_interactive_hello.expect index e659cf8703c..5fa31d33e87 100755 --- a/tests/queries/0_stateless/02116_interactive_hello.expect +++ b/tests/queries/0_stateless/02116_interactive_hello.expect @@ -1,6 +1,10 @@ #!/usr/bin/expect -f # Tags: long +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 match_max 100000 @@ -12,7 +16,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion" expect -re "ClickHouse client version \[\\d\]{2}.\[\\d\]{1,2}.\[\\d\]{1,2}.\[\\d\]{1,2}.\r" diff --git a/tests/queries/0_stateless/02132_client_history_navigation.expect b/tests/queries/0_stateless/02132_client_history_navigation.expect index b722a0af04c..10167fb2e97 100755 --- a/tests/queries/0_stateless/02132_client_history_navigation.expect +++ b/tests/queries/0_stateless/02132_client_history_navigation.expect @@ -1,5 +1,9 @@ #!/usr/bin/expect -f +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 3 match_max 100000 @@ -14,7 +18,6 @@ expect_after { # useful debugging configuration # exp_internal 1 -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --highlight 0" expect ":) " diff --git a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect index a77de874010..b95f85403e3 100755 --- a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect +++ b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect @@ -1,5 +1,9 @@ #!/usr/bin/expect -f +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 60 set uuid "" @@ -11,7 +15,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT" expect ":) " diff --git a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect index 4f006b926bd..f70b699c71f 100755 --- a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect +++ b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect @@ -1,5 +1,9 @@ #!/usr/bin/expect -f +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + log_user 0 set timeout 20 match_max 100000 @@ -9,7 +13,6 @@ expect_after { timeout { exit 1 } } -set basedir [file dirname $argv0] spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion" expect ":) " diff --git a/tests/queries/0_stateless/02245_s3_schema_desc.sql b/tests/queries/0_stateless/02245_s3_schema_desc.sql index 2cd362ff233..8c12d196800 100644 --- a/tests/queries/0_stateless/02245_s3_schema_desc.sql +++ b/tests/queries/0_stateless/02245_s3_schema_desc.sql @@ -11,4 +11,4 @@ desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'test', 'testtest', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto'); -SELECT * FROM s3(decodeURLComponent(NULL), [NULL]); --{serverError 170} +SELECT * FROM s3(decodeURLComponent(NULL), [NULL]); --{serverError BAD_ARGUMENTS} diff --git a/tests/queries/0_stateless/02282_array_distance.reference b/tests/queries/0_stateless/02282_array_distance.reference index ebce2788fe9..9758da9a833 100644 --- a/tests/queries/0_stateless/02282_array_distance.reference +++ b/tests/queries/0_stateless/02282_array_distance.reference @@ -1,17 +1,22 @@ 6 3.7416573867739413 +14 3.2071843327373397 3 0.00258509695694209 \N +\N nan -0 0 0 0 +0 0 0 0 0 12 14 21 7.0710678118654755 9.16515138991168 12.12435565298214 +50 +84 +147 5.917593844525055 8.308858759453505 9.932246380845738 @@ -27,21 +32,51 @@ nan 0.020204102886728692 0.11808289631180313 0 -1 1 218.74642854227358 -1 2 1348.2117786164013 -2 1 219.28064210048274 -2 2 1347.4008312302617 -3 1 214.35251339790725 -3 2 1342.8856987845243 -1 1 218.74642854227358 -1 2 1348.2117786164013 -2 1 219.28064210048274 -2 2 1347.4008312302617 -3 1 214.35251339790725 -3 2 1342.8856987845243 -1 1 218.74642854227358 -1 2 1348.2117786164013 -2 1 219.28064210048274 -2 2 1347.4008312302617 -3 1 214.35251339790725 -3 2 1342.8856987845243 +1 1 0 0 0 0 0 0 +1 2 2031 788 981.3289733414064 1182.129011571918 1397429 0.1939823640079572 +2 1 2031 788 981.3289733414064 1182.129011571918 1397429 0.1939823640079572 +2 2 0 0 0 0 0 0 +3 3 0 0 0 0 0 0 +3 4 68 2 6.238144819822316 11.661903789690601 136 0.0010041996325123037 +4 3 68 2 6.238144819822316 11.661903789690601 136 0.0010041996325123037 +4 4 0 0 0 0 0 0 +5 5 0 0 0 0 0 0 +5 6 268 2 9.70940985211151 23.15167380558045 536 0.00007815428961455151 +6 5 268 2 9.70940985211151 23.15167380558045 536 0.00007815428961455151 +6 6 0 0 0 0 0 0 +1 1 0 0 0 0 0 0 +1 2 2031 788 992.2102 1182.129 1397429 0.19398236 +2 1 2031 788 992.2102 1182.129 1397429 0.19398236 +2 2 0 0 0 0 0 0 +3 3 0 0 0 0 0 0 +3 4 68 2 6.4792237 11.661903 136 0.0010041595 +4 3 68 2 6.4792237 11.661903 136 0.0010041595 +4 4 0 0 0 0 0 0 +5 5 0 0 0 0 0 0 +5 6 268 2 10.23446 23.151674 536 0.00007814169 +6 5 268 2 10.23446 23.151674 536 0.00007814169 +6 6 0 0 0 0 0 0 +1 1 0 0 0 0 0 0 +1 2 2031 788 992.2102104083964 1182.129011571918 1397429 0.1939823640079572 +2 1 2031 788 992.2102104083964 1182.129011571918 1397429 0.1939823640079572 +2 2 0 0 0 0 0 0 +3 3 0 0 0 0 0 0 +3 4 68 2 6.479223602554966 11.661903789690601 136 0.0010041996325123037 +4 3 68 2 6.479223602554966 11.661903789690601 136 0.0010041996325123037 +4 4 0 0 0 0 0 0 +5 5 0 0 0 0 0 0 +5 6 268 2 10.234459893824097 23.15167380558045 536 0.00007815428961455151 +6 5 268 2 10.234459893824097 23.15167380558045 536 0.00007815428961455151 +6 6 0 0 0 0 0 0 +1 1 0 0 0 0 0 0 +1 2 2031 788 992.2102104083964 1182.129011571918 1397429 0.1939823640079572 +2 1 2031 788 992.2102104083964 1182.129011571918 1397429 0.1939823640079572 +2 2 0 0 0 0 0 0 +3 3 0 0 0 0 0 0 +3 4 68 2 6.479223602554966 11.661903789690601 136 0.0010041996325123037 +4 3 68 2 6.479223602554966 11.661903789690601 136 0.0010041996325123037 +4 4 0 0 0 0 0 0 +5 5 0 0 0 0 0 0 +5 6 268 2 10.234459893824097 23.15167380558045 536 0.00007815428961455151 +6 5 268 2 10.234459893824097 23.15167380558045 536 0.00007815428961455151 +6 6 0 0 0 0 0 0 diff --git a/tests/queries/0_stateless/02282_array_distance.sql b/tests/queries/0_stateless/02282_array_distance.sql index 75e4b0d653e..9c16071dc1f 100644 --- a/tests/queries/0_stateless/02282_array_distance.sql +++ b/tests/queries/0_stateless/02282_array_distance.sql @@ -1,10 +1,12 @@ SELECT L1Distance([0, 0, 0], [1, 2, 3]); SELECT L2Distance([1, 2, 3], [0, 0, 0]); +SELECT L2SquaredDistance([1, 2, 3], [0, 0, 0]); SELECT LpDistance([1, 2, 3], [0, 0, 0], 3.5); SELECT LinfDistance([1, 2, 3], [0, 0, 0]); SELECT cosineDistance([1, 2, 3], [3, 5, 7]); SELECT L2Distance([1, 2, 3], NULL); +SELECT L2SquaredDistance([1, 2, 3], NULL); SELECT cosineDistance([1, 2, 3], [0, 0, 0]); -- Overflows @@ -12,6 +14,7 @@ WITH CAST([-547274980, 1790553898, 1981517754, 1908431500, 1352428565, -57341255 SELECT L1Distance(a,a), L2Distance(a,a), + L2SquaredDistance(a,a), LinfDistance(a,a), cosineDistance(a, a); @@ -27,23 +30,67 @@ CREATE TABLE vec2d (id UInt64, v Array(Float64)) ENGINE = Memory; INSERT INTO vec1 VALUES (1, [3, 4, 5]), (2, [2, 4, 8]), (3, [7, 7, 7]); SELECT L1Distance(v, [0, 0, 0]) FROM vec1; SELECT L2Distance(v, [0, 0, 0]) FROM vec1; +SELECT L2SquaredDistance(v, [0, 0, 0]) FROM vec1; SELECT LpDistance(v, [0, 0, 0], 3.14) FROM vec1; SELECT LinfDistance([5, 4, 3], v) FROM vec1; SELECT cosineDistance([3, 2, 1], v) FROM vec1; SELECT LinfDistance(v, materialize([0, -2, 0])) FROM vec1; SELECT cosineDistance(v, materialize([1., 1., 1.])) FROM vec1; -INSERT INTO vec2 VALUES (1, [100, 200, 0]), (2, [888, 777, 666]); -SELECT v1.id, v2.id, L2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2 v2; +INSERT INTO vec2 VALUES (1, [100, 200, 0]), (2, [888, 777, 666]), (3, range(1, 35, 1)), (4, range(3, 37, 1)), (5, range(1, 135, 1)), (6, range(3, 137, 1)); +SELECT + v1.id, + v2.id, + L1Distance(v1.v, v2.v), + LinfDistance(v1.v, v2.v), + LpDistance(v1.v, v2.v, 3.1), + L2Distance(v1.v, v2.v), + L2SquaredDistance(v1.v, v2.v), + cosineDistance(v1.v, v2.v) +FROM vec2 v1, vec2 v2 +WHERE length(v1.v) == length(v2.v); -INSERT INTO vec2f VALUES (1, [100, 200, 0]), (2, [888, 777, 666]); -SELECT v1.id, v2.id, L2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2f v2; +INSERT INTO vec2f VALUES (1, [100, 200, 0]), (2, [888, 777, 666]), (3, range(1, 35, 1)), (4, range(3, 37, 1)), (5, range(1, 135, 1)), (6, range(3, 137, 1)); +SELECT + v1.id, + v2.id, + L1Distance(v1.v, v2.v), + LinfDistance(v1.v, v2.v), + LpDistance(v1.v, v2.v, 3), + L2Distance(v1.v, v2.v), + L2SquaredDistance(v1.v, v2.v), + cosineDistance(v1.v, v2.v) +FROM vec2f v1, vec2f v2 +WHERE length(v1.v) == length(v2.v); -INSERT INTO vec2d VALUES (1, [100, 200, 0]), (2, [888, 777, 666]); -SELECT v1.id, v2.id, L2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2d v2; +INSERT INTO vec2d VALUES (1, [100, 200, 0]), (2, [888, 777, 666]), (3, range(1, 35, 1)), (4, range(3, 37, 1)), (5, range(1, 135, 1)), (6, range(3, 137, 1)); +SELECT + v1.id, + v2.id, + L1Distance(v1.v, v2.v), + LinfDistance(v1.v, v2.v), + LpDistance(v1.v, v2.v, 3), + L2Distance(v1.v, v2.v), + L2SquaredDistance(v1.v, v2.v), + cosineDistance(v1.v, v2.v) +FROM vec2d v1, vec2d v2 +WHERE length(v1.v) == length(v2.v); + +SELECT + v1.id, + v2.id, + L1Distance(v1.v, v2.v), + LinfDistance(v1.v, v2.v), + LpDistance(v1.v, v2.v, 3), + L2Distance(v1.v, v2.v), + L2SquaredDistance(v1.v, v2.v), + cosineDistance(v1.v, v2.v) +FROM vec2f v1, vec2d v2 +WHERE length(v1.v) == length(v2.v); SELECT L1Distance([0, 0], [1]); -- { serverError 190 } SELECT L2Distance([1, 2], (3,4)); -- { serverError 43 } +SELECT L2SquaredDistance([1, 2], (3,4)); -- { serverError 43 } SELECT LpDistance([1, 2], [3,4]); -- { serverError 42 } SELECT LpDistance([1, 2], [3,4], -1.); -- { serverError 69 } SELECT LpDistance([1, 2], [3,4], 'aaa'); -- { serverError 43 } diff --git a/tests/queries/0_stateless/02283_array_norm.reference b/tests/queries/0_stateless/02283_array_norm.reference index ebaadee321f..ed819e11255 100644 --- a/tests/queries/0_stateless/02283_array_norm.reference +++ b/tests/queries/0_stateless/02283_array_norm.reference @@ -1,29 +1,42 @@ 6 7.0710678118654755 +50 10.882246697870885 2 -10803059573 4234902446.7343364 10803059573 4234902446.7343364 3122003357.3280888 2096941042 -1 7 5 4.601724723020627 4 -2 2 2 2 2 -3 9 5.196152422706632 4.506432087111623 3 -4 0 0 0 0 +10803059573 4234902446.7343364 17934398733356468000 10803059573 4234902446.7343364 3122003357.3280888 2096941042 +1 7 5 25 4.601724723020627 4 +2 2 2 4 2 2 +3 9 5.196152422706632 27 4.506432087111623 3 +4 0 0 0 0 0 +5 330 78.16648898345122 6110 54.82161001608108 26 +6 5250 599.12436104702 358950 350.73959029428204 102 1 11 2 11 3 11 4 11 -1 7 5 4.601724723020627 4 -2 2 2 2 2 -3 9 5.196152422706632 4.506432087111623 3 -4 0 0 0 0 +5 11 +6 11 +1 7 5 25 4.6017246 4 +2 2 2 4 2 2 +3 9 5.196152 27 4.506432 3 +4 0 0 0 0 0 +5 330 78.16649 6110 54.82161 26 +6 5250 599.1244 358950 350.7396 102 1 11 2 11 3 11 4 11 -1 7 5 4.601724723020627 4 -2 2 2 2 2 -3 9 5.196152422706632 4.506432087111623 3 -4 0 0 0 0 +5 11 +6 11 +1 7 5 25 4.601724723020627 4 +2 2 2 4 2 2 +3 9 5.196152422706632 27 4.506432087111623 3 +4 0 0 0 0 0 +5 330 78.16648898345122 6110 54.82161001608108 26 +6 5250 599.12436104702 358950 350.73959029428204 102 1 11 2 11 3 11 4 11 +5 11 +6 11 diff --git a/tests/queries/0_stateless/02283_array_norm.sql b/tests/queries/0_stateless/02283_array_norm.sql index 6938618d633..dcb5288a1ac 100644 --- a/tests/queries/0_stateless/02283_array_norm.sql +++ b/tests/queries/0_stateless/02283_array_norm.sql @@ -1,5 +1,6 @@ SELECT L1Norm([1, 2, 3]); SELECT L2Norm([3., 4., 5.]); +SELECT L2SquaredNorm([3., 4., 5.]); SELECT LpNorm([3., 4., 5.], 1.1); SELECT LinfNorm([0, 0, 2]); @@ -8,6 +9,7 @@ WITH CAST([-547274980, 1790553898, 1981517754, 1908431500, 1352428565, -57341255 SELECT L1Norm(a), L2Norm(a), + L2SquaredNorm(a), LpNorm(a,1), LpNorm(a,2), LpNorm(a,3.14), @@ -19,17 +21,17 @@ DROP TABLE IF EXISTS vec1d; CREATE TABLE vec1 (id UInt64, v Array(UInt8)) ENGINE = Memory; CREATE TABLE vec1f (id UInt64, v Array(Float32)) ENGINE = Memory; CREATE TABLE vec1d (id UInt64, v Array(Float64)) ENGINE = Memory; -INSERT INTO vec1 VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); -INSERT INTO vec1f VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); -INSERT INTO vec1d VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); +INSERT INTO vec1 VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL), (5, range(7, 27)), (6, range(3, 103)); +INSERT INTO vec1f VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL), (5, range(7, 27)), (6, range(3, 103)); +INSERT INTO vec1d VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL), (5, range(7, 27)), (6, range(3, 103)); -SELECT id, L1Norm(v), L2Norm(v), LpNorm(v, 2.7), LinfNorm(v) FROM vec1; +SELECT id, L1Norm(v), L2Norm(v), L2SquaredNorm(v), LpNorm(v, 2.7), LinfNorm(v) FROM vec1; SELECT id, L1Norm(materialize([5., 6.])) FROM vec1; -SELECT id, L1Norm(v), L2Norm(v), LpNorm(v, 2.7), LinfNorm(v) FROM vec1f; +SELECT id, L1Norm(v), L2Norm(v), L2SquaredNorm(v), LpNorm(v, 2.7), LinfNorm(v) FROM vec1f; SELECT id, L1Norm(materialize([5., 6.])) FROM vec1f; -SELECT id, L1Norm(v), L2Norm(v), LpNorm(v, 2.7), LinfNorm(v) FROM vec1d; +SELECT id, L1Norm(v), L2Norm(v), L2SquaredNorm(v), LpNorm(v, 2.7), LinfNorm(v) FROM vec1d; SELECT id, L1Norm(materialize([5., 6.])) FROM vec1d; SELECT L1Norm(1, 2); -- { serverError 42 } diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference b/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference new file mode 100644 index 00000000000..b53b561137e --- /dev/null +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.reference @@ -0,0 +1,79 @@ +-- enable distinct in order optimization +-- create table with only primary key columns +-- the same values in every chunk, pre-distinct should skip entire chunks with the same key as previous one +0 +-- create table with only primary key columns +-- pre-distinct should skip part of chunk since it contains values from previous one +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +-- create table with not only primary key columns +-- distinct with primary key prefix only +0 +-- distinct with primary key prefix only, order by sorted column +0 +-- distinct with primary key prefix only, order by sorted column desc +0 +-- distinct with full key, order by sorted column +0 0 +0 1 +0 2 +0 3 +0 4 +-- distinct with full key, order by sorted column desc +0 4 +0 3 +0 2 +0 1 +0 0 +-- distinct with key prefix and non-sorted column, order by non-sorted +0 0 +0 1 +0 2 +0 3 +0 4 +0 5 +0 6 +0 7 +0 8 +0 9 +-- distinct with key prefix and non-sorted column, order by non-sorted desc +0 9 +0 8 +0 7 +0 6 +0 5 +0 4 +0 3 +0 2 +0 1 +0 0 +-- distinct with non-key prefix and non-sorted column, order by non-sorted +0 0 +1 1 +2 2 +3 3 +4 4 +0 5 +1 6 +2 7 +3 8 +4 9 +-- distinct with non-key prefix and non-sorted column, order by non-sorted desc +4 9 +3 8 +2 7 +1 6 +0 5 +4 4 +3 3 +2 2 +1 1 +0 0 diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql new file mode 100644 index 00000000000..afe53a95b26 --- /dev/null +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql @@ -0,0 +1,46 @@ +select '-- enable distinct in order optimization'; +set optimize_distinct_in_order=1; +select '-- create table with only primary key columns'; +drop table if exists distinct_in_order sync; +create table distinct_in_order (a int) engine=MergeTree() order by a settings index_granularity=10; +select '-- the same values in every chunk, pre-distinct should skip entire chunks with the same key as previous one'; +insert into distinct_in_order (a) select * from zeros(10); +insert into distinct_in_order (a) select * from zeros(10); -- this entire chunk should be skipped in pre-distinct +select distinct * from distinct_in_order settings max_block_size=10, max_threads=1; + +select '-- create table with only primary key columns'; +select '-- pre-distinct should skip part of chunk since it contains values from previous one'; +drop table if exists distinct_in_order sync; +create table distinct_in_order (a int) engine=MergeTree() order by a settings index_granularity=10; +insert into distinct_in_order (a) select * from zeros(10); +insert into distinct_in_order select * from numbers(10); -- first row (0) from this chunk should be skipped in pre-distinct +select distinct a from distinct_in_order settings max_block_size=10, max_threads=1; + +select '-- create table with not only primary key columns'; +drop table if exists distinct_in_order sync; +create table distinct_in_order (a int, b int, c int) engine=MergeTree() order by (a, b); +insert into distinct_in_order select number % number, number % 5, number % 10 from numbers(1,1000000); + +select '-- distinct with primary key prefix only'; +select distinct a from distinct_in_order; +select '-- distinct with primary key prefix only, order by sorted column'; +select distinct a from distinct_in_order order by a; +select '-- distinct with primary key prefix only, order by sorted column desc'; +select distinct a from distinct_in_order order by a desc; + +select '-- distinct with full key, order by sorted column'; +select distinct a,b from distinct_in_order order by b; +select '-- distinct with full key, order by sorted column desc'; +select distinct a,b from distinct_in_order order by b desc; + +select '-- distinct with key prefix and non-sorted column, order by non-sorted'; +select distinct a,c from distinct_in_order order by c; +select '-- distinct with key prefix and non-sorted column, order by non-sorted desc'; +select distinct a,c from distinct_in_order order by c desc; + +select '-- distinct with non-key prefix and non-sorted column, order by non-sorted'; +select distinct b,c from distinct_in_order order by c; +select '-- distinct with non-key prefix and non-sorted column, order by non-sorted desc'; +select distinct b,c from distinct_in_order order by c desc; + +drop table if exists distinct_in_order sync; diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference new file mode 100644 index 00000000000..2dac69edc41 --- /dev/null +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.reference @@ -0,0 +1,19 @@ +-- disable optimize_distinct_in_order +-- distinct all primary key columns -> no optimizations +No optimizations +-- enable optimize_distinct_in_order +-- distinct with all primary key columns -> pre-distinct optimization only +DistinctSortedChunkTransform +-- distinct with primary key prefix -> pre-distinct optimization only +DistinctSortedChunkTransform +-- distinct with primary key prefix and order by on column in distinct -> pre-distinct and final distinct optimization +DistinctSortedTransform +DistinctSortedChunkTransform +-- distinct with primary key prefix and order by on column _not_ in distinct -> pre-distinct optimization only +DistinctSortedChunkTransform +-- distinct with non-primary key prefix -> no optimizations +No optimizations +-- distinct with non-primary key prefix and order by on column in distinct -> final distinct optimization only +DistinctSortedTransform +-- distinct with non-primary key prefix and order by on column _not_ in distinct -> no optimizations +No optimizations diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh new file mode 100755 index 00000000000..21f50a147ac --- /dev/null +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +[ ! -z "$CLICKHOUSE_CLIENT_REDEFINED" ] && CLICKHOUSE_CLIENT=$CLICKHOUSE_CLIENT_REDEFINED + +DISABLE_OPTIMIZATION="set optimize_distinct_in_order=0" +ENABLE_OPTIMIZATION="set optimize_distinct_in_order=1" +GREP_OPTIMIZATIONS="grep 'DistinctSortedChunkTransform\|DistinctSortedTransform'" +TRIM_LEADING_SPACES="sed -e 's/^[ \t]*//'" +FIND_OPTIMIZATIONS="$GREP_OPTIMIZATIONS | $TRIM_LEADING_SPACES" + +$CLICKHOUSE_CLIENT -q "drop table if exists distinct_in_order_explain sync" +$CLICKHOUSE_CLIENT -q "create table distinct_in_order_explain (a int, b int, c int) engine=MergeTree() order by (a, b, c)" +$CLICKHOUSE_CLIENT -q "insert into distinct_in_order_explain select number % number, number % 5, number % 10 from numbers(1,10)" + +$CLICKHOUSE_CLIENT -q "select '-- disable optimize_distinct_in_order'" +$CLICKHOUSE_CLIENT -q "select '-- distinct all primary key columns -> no optimizations'" +$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;explain pipeline select distinct * from distinct_in_order_explain" | eval $GREP_OPTIMIZATIONS || echo "No optimizations" + +$CLICKHOUSE_CLIENT -q "select '-- enable optimize_distinct_in_order'" +$CLICKHOUSE_CLIENT -q "select '-- distinct with all primary key columns -> pre-distinct optimization only'" +$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct * from distinct_in_order_explain" | eval $FIND_OPTIMIZATIONS + +$CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix -> pre-distinct optimization only'" +$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain" | eval $FIND_OPTIMIZATIONS + +$CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by on column in distinct -> pre-distinct and final distinct optimization'" +$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain order by c" | eval $FIND_OPTIMIZATIONS + +$CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by on column _not_ in distinct -> pre-distinct optimization only'" +$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain order by b" | eval $FIND_OPTIMIZATIONS + +$CLICKHOUSE_CLIENT -q "select '-- distinct with non-primary key prefix -> no optimizations'" +$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain" | eval $GREP_OPTIMIZATIONS || echo "No optimizations" + +$CLICKHOUSE_CLIENT -q "select '-- distinct with non-primary key prefix and order by on column in distinct -> final distinct optimization only'" +$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain order by b" | eval $FIND_OPTIMIZATIONS + +$CLICKHOUSE_CLIENT -q "select '-- distinct with non-primary key prefix and order by on column _not_ in distinct -> no optimizations'" +$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain order by a" | eval $GREP_OPTIMIZATIONS || echo "No optimizations" diff --git a/tests/queries/0_stateless/02322_sql_insert_format.reference b/tests/queries/0_stateless/02322_sql_insert_format.reference new file mode 100644 index 00000000000..e64ef587fa7 --- /dev/null +++ b/tests/queries/0_stateless/02322_sql_insert_format.reference @@ -0,0 +1,23 @@ +INSERT INTO table (`x`, `y`, `z`) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (0, 0, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (1, 1, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (2, 2, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (3, 0, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (4, 1, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (2, 2, 'Hello'), (3, 0, 'Hello'); +INSERT INTO table (`x`, `y`, `z`) VALUES (4, 1, 'Hello'); +INSERT INTO table VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); +REPLACE INTO table (`x`, `y`, `z`) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); +INSERT INTO test (`x`, `y`, `z`) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); +INSERT INTO test (x, y, z) VALUES (0, 0, 'Hello'), (1, 1, 'Hello'), (2, 2, 'Hello'), (3, 0, 'Hello'), (4, 1, 'Hello'); +0 0 Hello +1 1 Hello +2 2 Hello +3 0 Hello +4 1 Hello +0 0 Hello +1 1 Hello +2 2 Hello +3 0 Hello +4 1 Hello diff --git a/tests/queries/0_stateless/02322_sql_insert_format.sql b/tests/queries/0_stateless/02322_sql_insert_format.sql new file mode 100644 index 00000000000..34cde1e56b6 --- /dev/null +++ b/tests/queries/0_stateless/02322_sql_insert_format.sql @@ -0,0 +1,13 @@ +-- Tags: no-parallel + +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert; +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_max_batch_size=1; +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_max_batch_size=2; +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_include_column_names=0; +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_use_replace=1; +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_table_name='test'; +select number as x, number % 3 as y, 'Hello' as z from numbers(5) format SQLInsert settings output_format_sql_insert_table_name='test', output_format_sql_insert_quote_names=0; +insert into function file(02322_data.sql, 'SQLInsert') select number as x, number % 3 as y, 'Hello' as z from numbers(5) settings output_format_sql_insert_max_batch_size=2, output_format_sql_insert_quote_names=0, engine_file_truncate_on_insert=1; +select * from file(02322_data.sql, 'MySQLDump'); +insert into function file(02322_data.sql, 'SQLInsert') select number, number % 3, 'Hello' from numbers(5) settings output_format_sql_insert_max_batch_size=2, engine_file_truncate_on_insert=1; +select * from file(02322_data.sql, 'MySQLDump'); diff --git a/tests/queries/0_stateless/02337_check_translate_qualified_names_matcher.reference b/tests/queries/0_stateless/02337_check_translate_qualified_names_matcher.reference new file mode 100644 index 00000000000..c094c553f81 --- /dev/null +++ b/tests/queries/0_stateless/02337_check_translate_qualified_names_matcher.reference @@ -0,0 +1,2 @@ +12 +12 diff --git a/tests/queries/0_stateless/02337_check_translate_qualified_names_matcher.sql b/tests/queries/0_stateless/02337_check_translate_qualified_names_matcher.sql new file mode 100644 index 00000000000..09ab591f98a --- /dev/null +++ b/tests/queries/0_stateless/02337_check_translate_qualified_names_matcher.sql @@ -0,0 +1,10 @@ +CREATE TABLE nested_name_tuples +( + `a` Tuple(x String, y Tuple(i Int32, j String)) +) +ENGINE = Memory; + +INSERT INTO nested_name_tuples VALUES(('asd', (12, 'ddd'))); + +SELECT t.a.y.i FROM nested_name_tuples as t; +SELECT nested_name_tuples.a.y.i FROM nested_name_tuples as t; diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh index 29b3b7b3d9d..4f3baa1f660 100755 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.sh @@ -4,12 +4,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -function random_str() -{ - local n=$1 && shift - tr -cd '[:lower:]' < /dev/urandom | head -c"$n" -} - function check_refcnt_for_table() { local table=$1 && shift diff --git a/tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.reference b/tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.reference new file mode 100644 index 00000000000..2b70bdc272e --- /dev/null +++ b/tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.reference @@ -0,0 +1,12 @@ +6 +6 +6 +6 +6 +6 +7 +7 +7 +7 +7 +7 diff --git a/tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.sql b/tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.sql new file mode 100644 index 00000000000..6208baf41c4 --- /dev/null +++ b/tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.sql @@ -0,0 +1,24 @@ +drop table if exists tab; + +create table tab (id UInt32, haystack String, pattern String) engine = MergeTree() order by id; +insert into tab values (1, 'aaaxxxaa\0xxx', 'x'); + +select countSubstrings('aaaxxxaa\0xxx', pattern) from tab where id = 1; +select countSubstringsCaseInsensitive('aaaxxxaa\0xxx', pattern) from tab where id = 1; +select countSubstringsCaseInsensitiveUTF8('aaaxxxaa\0xxx', pattern) from tab where id = 1; + +select countSubstrings(haystack, pattern) from tab where id = 1; +select countSubstringsCaseInsensitive(haystack, pattern) from tab where id = 1; +select countSubstringsCaseInsensitiveUTF8(haystack, pattern) from tab where id = 1; + +insert into tab values (2, 'aaaaa\0x', 'x'); + +select position('aaaaa\0x', pattern) from tab where id = 2; +select positionCaseInsensitive('aaaaa\0x', pattern) from tab where id = 2; +select positionCaseInsensitiveUTF8('aaaaa\0x', pattern) from tab where id = 2; + +select position(haystack, pattern) from tab where id = 2; +select positionCaseInsensitive(haystack, pattern) from tab where id = 2; +select positionCaseInsensitiveUTF8(haystack, pattern) from tab where id = 2; + +drop table if exists tab; diff --git a/tests/queries/0_stateless/02347_rank_corr_nan.reference b/tests/queries/0_stateless/02347_rank_corr_nan.reference new file mode 100644 index 00000000000..6db331af725 --- /dev/null +++ b/tests/queries/0_stateless/02347_rank_corr_nan.reference @@ -0,0 +1 @@ +nan diff --git a/tests/queries/0_stateless/02347_rank_corr_nan.sql b/tests/queries/0_stateless/02347_rank_corr_nan.sql new file mode 100644 index 00000000000..0fd755259e6 --- /dev/null +++ b/tests/queries/0_stateless/02347_rank_corr_nan.sql @@ -0,0 +1 @@ +SELECT rankCorr(number, nan) FROM numbers(10); diff --git a/tests/queries/0_stateless/02350_views_max_insert_threads.reference b/tests/queries/0_stateless/02350_views_max_insert_threads.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02350_views_max_insert_threads.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02350_views_max_insert_threads.sql b/tests/queries/0_stateless/02350_views_max_insert_threads.sql new file mode 100644 index 00000000000..e19ad465b49 --- /dev/null +++ b/tests/queries/0_stateless/02350_views_max_insert_threads.sql @@ -0,0 +1,15 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/37900 + +drop table if exists t; +drop table if exists t_mv; +create table t (a UInt64) Engine = Null; +create materialized view t_mv Engine = Null AS select now() as ts, max(a) from t group by ts; + +insert into t select * from numbers_mt(10e6) settings max_threads = 16, max_insert_threads=16; +system flush logs; + +select arrayUniq(thread_ids)>=16 from system.query_log where + event_date >= yesterday() and + current_database = currentDatabase() and + type = 'QueryFinish' and + startsWith(query, 'insert'); diff --git a/tests/queries/0_stateless/02351_Map_combinator_dist.reference b/tests/queries/0_stateless/02351_Map_combinator_dist.reference new file mode 100644 index 00000000000..98fb6a68656 --- /dev/null +++ b/tests/queries/0_stateless/02351_Map_combinator_dist.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02351_Map_combinator_dist.sql b/tests/queries/0_stateless/02351_Map_combinator_dist.sql new file mode 100644 index 00000000000..30816061338 --- /dev/null +++ b/tests/queries/0_stateless/02351_Map_combinator_dist.sql @@ -0,0 +1,83 @@ +-- Tags: no-backward-compatibility-check:22.6 + +-- https://github.com/ClickHouse/ClickHouse/issues/35359 + +-- sumMap +SELECT x[67] +FROM +( + SELECT + A, + sumMap(CAST(arrayMap(x -> (x, 1), r), 'Map(UInt8,Int64)')) AS x + FROM remote('127.{1,1}', view( + SELECT + number AS A, + range(150) AS r + FROM numbers(60) + WHERE (A % 2) = shardNum() + )) + GROUP BY A + LIMIT 100000000 +) +WHERE A = 53 +SETTINGS prefer_localhost_replica = 0, distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 0, group_by_two_level_threshold_bytes = 0; + +-- minMap +SELECT x[0] +FROM +( + SELECT + A, + minMap(CAST(arrayMap(x -> (x, 1), r), 'Map(UInt8,Int64)')) AS x + FROM remote('127.{1,1}', view( + SELECT + number AS A, + range(150) AS r + FROM numbers(60) + WHERE (A % 2) = shardNum() + )) + GROUP BY A + LIMIT 100000000 +) +WHERE A = 41 +SETTINGS prefer_localhost_replica = 0, distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 0, group_by_two_level_threshold_bytes = 0; + +-- maxMap +SELECT x[0] +FROM +( + SELECT + A, + maxMap(CAST(arrayMap(x -> (x, 1), r), 'Map(UInt8,Int64)')) AS x + FROM remote('127.{1,1}', view( + SELECT + number AS A, + range(150) AS r + FROM numbers(60) + WHERE (A % 2) = shardNum() + )) + GROUP BY A + LIMIT 100000000 +) +WHERE A = 41 +SETTINGS prefer_localhost_replica = 0, distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 0, group_by_two_level_threshold_bytes = 0; + +-- avgMap +SELECT x[0] +FROM +( + SELECT + A, + avgMap(CAST(arrayMap(x -> (x, 1), r), 'Map(UInt8,Int64)')) AS x + FROM remote('127.{1,1}', view( + SELECT + number AS A, + range(150) AS r + FROM numbers(60) + WHERE (A % 2) = shardNum() + )) + GROUP BY A + LIMIT 100000000 +) +WHERE A = 41 +SETTINGS prefer_localhost_replica = 0, distributed_aggregation_memory_efficient = 1, group_by_two_level_threshold = 0, group_by_two_level_threshold_bytes = 0; diff --git a/tests/queries/0_stateless/02352_interactive_queries_from_file.expect b/tests/queries/0_stateless/02352_interactive_queries_from_file.expect new file mode 100755 index 00000000000..d15b804b0b9 --- /dev/null +++ b/tests/queries/0_stateless/02352_interactive_queries_from_file.expect @@ -0,0 +1,45 @@ +#!/usr/bin/expect -f +# tags: long, no-parallel + +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + +log_user 0 +set timeout 20 +match_max 100000 + +expect_after { + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } +} + +spawn bash -c "echo 'select 1;\nselect 2;\nselect 3' > queries_02352" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT --disable_suggestion" +expect ":) " + +send -- "\\i queries_02352\r" +expect "1" +expect "2" +expect "3" +expect ":) " +send -- "\\i queries_02352;\r" +expect "1" +expect "2" +expect "3" +expect ":) " +send -- " \\i queries_02352 ; \r" +expect "1" +expect "2" +expect "3" +expect ":) " +send -- " \\i queries_02352 ; \r" +expect "1" +expect "2" +expect "3" +expect ":) " + +send -- "exit\r" +expect eof diff --git a/tests/queries/0_stateless/02352_interactive_queries_from_file.reference b/tests/queries/0_stateless/02352_interactive_queries_from_file.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index 866fba506e4..ab5d5ddc1b6 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -143,3 +143,9 @@ function wait_for_queries_to_finish() fi done } + +function random_str() +{ + local n=$1 && shift + tr -cd '[:lower:]' < /dev/urandom | head -c"$n" +} diff --git a/tests/stress b/tests/stress deleted file mode 100755 index 1aad49250c2..00000000000 --- a/tests/stress +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -# https://stackoverflow.com/questions/360201/how-do-i-kill-background-processes-jobs-when-my-shell-script-exits -trap 'kill -9 $(jobs -p)' EXIT - -function thread() -{ - while true; do - ./clickhouse-test --client-option="query-fuzzer-runs=10" --order random 2>&1 | awk '/^\w+:/ { printf("\033[0;%s%sm \033[0m", ('$1' % 2 ? "4" : "10"), (int('$1' / 2) % 8)) }' - done -} - -# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout -export -f thread; - -NUM_THREADS=${1:-"16"} -TIMEOUT=${2:-"300"} - -for i in $(seq 1 $NUM_THREADS); do - timeout $TIMEOUT bash -c "thread $i" 2> /dev/null & -done - -wait diff --git a/tools/clickhouse-diagnostics/main.go b/tools/clickhouse-diagnostics/main.go deleted file mode 100644 index 6187ed364ef..00000000000 --- a/tools/clickhouse-diagnostics/main.go +++ /dev/null @@ -1,9 +0,0 @@ -package main - -import ( - "github.com/ClickHouse/clickhouse-diagnostics/cmd" -) - -func main() { - cmd.Execute() -} diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 2bfa98b80c7..cc22b712c62 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -20,6 +20,7 @@ CamelCase CapnProto CentOS ClickHouse +ClickHouse's Config Contrib Ctrl @@ -121,6 +122,7 @@ SATA SERIALIZABLE SIMD SMALLINT +SQLInsert SQLSTATE SSSE Schemas @@ -411,6 +413,7 @@ simdjson skippingerrors sparsehash sql +sqlinsert src stacktraces statbox diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 84ce7ae5742..adae3068dcd 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -314,6 +314,12 @@ for test_case in "${tests_with_event_time_date[@]}"; do } done +expect_tests=( $(find $ROOT_PATH/tests/queries -name '*.expect') ) +for test_case in "${expect_tests[@]}"; do + pattern="^exp_internal -f \$env(CLICKHOUSE_TMP)/\$basename.debuglog 0$" + grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'" +done + # Conflict markers find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files" diff --git a/utils/check-style/codespell-ignore-lines.list b/utils/check-style/codespell-ignore-lines.list index 7c2959e9468..78b0639989f 100644 --- a/utils/check-style/codespell-ignore-lines.list +++ b/utils/check-style/codespell-ignore-lines.list @@ -4,3 +4,4 @@ The TRE regular expression implementation (src/regex/reg* and src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{ uint64_t time_to_wait = nanoseconds * timebase_info.denom / timebase_info.numer; + REPLACE_ME diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp index d3a8323b81f..5d1d0f8a491 100644 --- a/utils/keeper-bench/Generator.cpp +++ b/utils/keeper-bench/Generator.cpp @@ -62,7 +62,7 @@ void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & pa promise->set_value(); }; - zookeeper.list(path, list_callback, nullptr); + zookeeper.list(path, ListRequestType::ALL, list_callback, nullptr); future.get(); while (!children.empty())