diff --git a/.clang-format b/.clang-format index c8b9672dc7d..d8f273702c8 100644 --- a/.clang-format +++ b/.clang-format @@ -12,6 +12,7 @@ BraceWrapping: AfterUnion: true BeforeCatch: true BeforeElse: true + BeforeLambdaBody: true IndentBraces: false BreakConstructorInitializersBeforeComma: false Cpp11BracedListStyle: true diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml new file mode 100644 index 00000000000..2602b9c28d5 --- /dev/null +++ b/.github/workflows/nightly.yml @@ -0,0 +1,73 @@ +name: NightlyBuilds + +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + +"on": + schedule: + - cron: '0 0 * * *' + +jobs: + DockerHubPushAarch64: + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix aarch64 --all + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_images_check.py --suffix amd64 --all + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/changed_images.json diff --git a/.potato.yml b/.potato.yml deleted file mode 100644 index 7cb87c58bd1..00000000000 --- a/.potato.yml +++ /dev/null @@ -1,27 +0,0 @@ -# This is the configuration file with settings for Potato. -# Potato is an internal Yandex technology that allows us to sync internal [Yandex.Tracker](https://yandex.com/tracker/) and GitHub. - -# For all PRs where documentation is needed, just add a 'pr-feature' label and we will include it into documentation sprints. - -# The project name. -name: clickhouse -# Object handlers defines which handlers we use. -handlers: - # The handler for creating an Yandex.Tracker issue. - - name: issue-create - params: - triggers: - # The trigger for creating the Yandex.Tracker issue. When the specified event occurs, it transfers PR data to Yandex.Tracker. - github:pullRequest:labeled: - data: - # The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues. - queue: CLICKHOUSEDOCS - # The issue title. - summary: '[Potato] Pull Request #{{pullRequest.number}}' - # The issue description. - description: > - {{pullRequest.description}} - - Ссылка на Pull Request: {{pullRequest.webUrl}} - # The condition for creating the Yandex.Tracker issue. - condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index c35009ba10a..529d7f0c4e3 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -127,11 +127,6 @@ endif() if(CMAKE_SYSTEM_NAME MATCHES "Darwin") add_definitions(-DOS_MACOSX) - if(CMAKE_SYSTEM_PROCESSOR MATCHES arm) - add_definitions(-DIOS_CROSS_COMPILE -DROCKSDB_LITE) - # no debug info for IOS, that will make our library big - add_definitions(-DNDEBUG) - endif() elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") add_definitions(-DOS_LINUX) elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS") diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index 3e3cfc38218..00000000000 --- a/docker-compose.yml +++ /dev/null @@ -1,15 +0,0 @@ -version: "2" - -services: - builder: - image: clickhouse/clickhouse-builder - build: docker/builder - client: - image: clickhouse/clickhouse-client - build: docker/client - command: ['--host', 'server'] - server: - image: clickhouse/clickhouse-server - build: docker/server - ports: - - 8123:8123 diff --git a/docker/images.json b/docker/images.json index 354bdaa8728..01284d4de69 100644 --- a/docker/images.json +++ b/docker/images.json @@ -32,6 +32,7 @@ "dependent": [] }, "docker/test/pvs": { + "only_amd64": true, "name": "clickhouse/pvs-test", "dependent": [] }, @@ -72,6 +73,7 @@ "dependent": [] }, "docker/test/integration/runner": { + "only_amd64": true, "name": "clickhouse/integration-tests-runner", "dependent": [] }, @@ -124,6 +126,7 @@ "dependent": [] }, "docker/test/integration/kerberos_kdc": { + "only_amd64": true, "name": "clickhouse/kerberos-kdc", "dependent": [] }, @@ -137,6 +140,7 @@ ] }, "docker/test/integration/kerberized_hadoop": { + "only_amd64": true, "name": "clickhouse/kerberized-hadoop", "dependent": [] }, diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 1ebaed752a6..e18c07bf2c1 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -185,15 +185,14 @@ handle SIGUSR2 nostop noprint pass handle SIG$RTMIN nostop noprint pass info signals continue +gcore backtrace full -info locals +thread apply all backtrace full info registers disassemble /s up -info locals disassemble /s up -info locals disassemble /s p \"done\" detach @@ -314,6 +313,11 @@ quit || echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \ | tail -1 > description.txt fi + + if test -f core.*; then + pigz core.* + mv core.*.gz core.gz + fi } case "$stage" in @@ -345,6 +349,10 @@ case "$stage" in time fuzz ;& "report") +CORE_LINK='' +if [ -f core.gz ]; then + CORE_LINK='core.gz' +fi cat > report.html < @@ -386,6 +394,7 @@ th { cursor: pointer; } fuzzer.log server.log main.log +${CORE_LINK}

diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile index 025f4b27fde..e42d115999a 100644 --- a/docker/test/integration/kerberized_hadoop/Dockerfile +++ b/docker/test/integration/kerberized_hadoop/Dockerfile @@ -20,4 +20,4 @@ RUN cd /tmp && \ cd commons-daemon-1.0.15-src/src/native/unix && \ ./configure && \ make && \ - cp ./jsvc /usr/local/hadoop/sbin + cp ./jsvc /usr/local/hadoop-2.7.0/sbin diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 1aad2ae6770..22dd2e14456 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -58,9 +58,7 @@ RUN apt-get update \ RUN dockerd --version; docker --version -ARG TARGETARCH -# FIXME: psycopg2-binary is not available for aarch64, we skip it for now -RUN test x$TARGETARCH = xarm64 || ( python3 -m pip install \ +RUN python3 -m pip install \ PyMySQL \ aerospike==4.0.0 \ avro==1.10.2 \ @@ -90,7 +88,7 @@ RUN test x$TARGETARCH = xarm64 || ( python3 -m pip install \ urllib3 \ requests-kerberos \ pyhdfs \ - azure-storage-blob ) + azure-storage-blob COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index eddaf969f33..fb47ed0cefa 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -1,5 +1,5 @@ # docker build -t clickhouse/performance-comparison . -FROM ubuntu:18.04 +FROM ubuntu:20.04 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index f484feecfd0..01cc7c97548 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -4,11 +4,7 @@ ARG FROM_TAG=latest FROM clickhouse/binary-builder:$FROM_TAG -# PVS studio doesn't support aarch64/arm64, so there is a check for it everywhere -# We'll produce an empty image for arm64 -ARG TARGETARCH - -RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \ +RUN apt-get update --yes \ && apt-get install \ bash \ wget \ @@ -21,7 +17,7 @@ RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \ libprotoc-dev \ libgrpc++-dev \ libc-ares-dev \ - --yes --no-install-recommends ) + --yes --no-install-recommends #RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add - #RUN sudo wget -nv -O /etc/apt/sources.list.d/viva64.list http://files.viva64.com/etc/viva64.list @@ -33,7 +29,7 @@ RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \ ENV PKG_VERSION="pvs-studio-latest" -RUN test x$TARGETARCH = xarm64 || ( set -x \ +RUN set -x \ && export PUBKEY_HASHSUM="ad369a2e9d8b8c30f5a9f2eb131121739b79c78e03fef0f016ea51871a5f78cd4e6257b270dca0ac3be3d1f19d885516" \ && wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \ && echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \ @@ -41,7 +37,7 @@ RUN test x$TARGETARCH = xarm64 || ( set -x \ && wget -nv "https://files.viva64.com/${PKG_VERSION}.deb" \ && { debsig-verify ${PKG_VERSION}.deb \ || echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \ - && dpkg -i "${PKG_VERSION}.deb" ) + && dpkg -i "${PKG_VERSION}.deb" ENV CCACHE_DIR=/test_output/ccache diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 4387d16ea7c..e57dbc38ded 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -148,14 +148,12 @@ info signals continue gcore backtrace full -info locals +thread apply all backtrace full info registers disassemble /s up -info locals disassemble /s up -info locals disassemble /s p \"done\" detach @@ -269,5 +267,5 @@ clickhouse-local --structure "test String, res String" -q "SELECT 'failure', tes # Default filename is 'core.PROCESS_ID' for core in core.*; do pigz $core - mv $core.gz /output/ + mv $core.gz /test_output/ done diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index d15f237587b..fbff6fd5e97 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -43,24 +43,27 @@ RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 20.10.6 -RUN set -eux; \ - \ -# this "case" statement is generated via "update.sh" - \ - if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/x86_64/docker-${DOCKER_VERSION}.tgz"; then \ - echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${x86_64}'"; \ - exit 1; \ - fi; \ - \ - tar --extract \ +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH + +# Install docker +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && set -eux \ + && if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/${rarch}/docker-${DOCKER_VERSION}.tgz"; then \ + echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${rarch}'" \ + && exit 1; \ + fi \ + && tar --extract \ --file docker.tgz \ --strip-components 1 \ --directory /usr/local/bin/ \ - ; \ - rm docker.tgz; \ - \ - dockerd --version; \ - docker --version + && rm docker.tgz \ + && dockerd --version \ + && docker --version COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 6769f48a466..92865c94475 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -886,3 +886,12 @@ S3 disk can be configured as `main` or `cold` storage: ``` In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule. + +## Virtual Columns {#virtual-columns} + +- `_part` — Name of a part. +- `_part_index` — Sequential index of the part in the query result. +- `_partition_id` — Name of a partition. +- `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`). +- `_partition_value` — Values (a tuple) of a `partition by` expression. +- `_sample_factor` — Sample factor (from the query). diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index faa1026b919..4d2454298f2 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -209,6 +209,8 @@ When querying a `Distributed` table, `SELECT` queries are sent to all shards and When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas). +To learn more about how distibuted `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation. + ## Virtual Columns {#virtual-columns} - `_shard_num` — Contains the `shard_num` value from the table `system.clusters`. Type: [UInt32](../../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index 04f035206b5..26d928085ce 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -7,18 +7,29 @@ toc_title: URL Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](../../../engines/table-engines/special/file.md) engine. -Syntax: `URL(URL, Format)` +Syntax: `URL(URL [,Format] [,CompressionMethod])` + +- The `URL` parameter must conform to the structure of a Uniform Resource Locator. The specified URL must point to a server that uses HTTP or HTTPS. This does not require any additional headers for getting a response from the server. + +- The `Format` must be one that ClickHouse can use in `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see [Formats](../../../interfaces/formats.md#formats). + +- `CompressionMethod` indicates that whether the HTTP body should be compressed. If the compression is enabled, the HTTP packets sent by the URL engine contain 'Content-Encoding' header to indicate which compression method is used. + +To enable compression, please first make sure the remote HTTP endpoint indicated by the `URL` parameter supports corresponding compression algorithm. + +The supported `CompressionMethod` should be one of following: +- gzip or gz +- deflate +- brotli or br +- lzma or xz +- zstd or zst +- lz4 +- bz2 +- snappy +- none ## Usage {#using-the-engine-in-the-clickhouse-server} -The `format` must be one that ClickHouse can use in -`SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see -[Formats](../../../interfaces/formats.md#formats). - -The `URL` must conform to the structure of a Uniform Resource Locator. The specified URL must point to a server -that uses HTTP or HTTPS. This does not require any -additional headers for getting a response from the server. - `INSERT` and `SELECT` queries are transformed to `POST` and `GET` requests, respectively. For processing `POST` requests, the remote server must support [Chunked transfer encoding](https://en.wikipedia.org/wiki/Chunked_transfer_encoding). diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 8a0fd618d32..986d5eadd80 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2304,7 +2304,7 @@ Possible values: - 1 — Enabled. - 0 — Disabled. -Default value: `0`. +Default value: `1`. ## output_format_parallel_formatting {#output-format-parallel-formatting} @@ -2315,7 +2315,7 @@ Possible values: - 1 — Enabled. - 0 — Disabled. -Default value: `0`. +Default value: `1`. ## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing} diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 29de9ee4b70..42307093dda 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -120,7 +120,7 @@ The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32 Get the first available contact method for the customer from the contact list: ``` sql -SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook; +SELECT name, coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook; ``` ``` text diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 8502fcdcf66..96bceb8958c 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -22,7 +22,7 @@ tuple(x, y, …) ## tupleElement {#tupleelement} A function that allows getting a column from a tuple. -‘N’ is the column index, starting from 1. N must be a constant. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple. +‘N’ is the column index, starting from 1. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple. There is no cost to execute the function. The function implements the operator `x.N`. diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md index 3d8d2673468..d8468370f3e 100644 --- a/docs/en/sql-reference/operators/in.md +++ b/docs/en/sql-reference/operators/in.md @@ -216,6 +216,17 @@ This is more optimal than using the normal IN. However, keep the following point It also makes sense to specify a local table in the `GLOBAL IN` clause, in case this local table is only available on the requestor server and you want to use data from it on remote servers. +### Distributed Subqueries and max_rows_in_set + +You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is tranferred during distributed queries. + +This is specially important if the `global in` query returns a large amount of data. Consider the following sql - +```sql +select * from table1 where col1 global in (select col1 from table2 where ) +``` + +If `some_predicate` is not selective enough, it will return large amount of data and cause performance issues. In such cases, it is wise to limit the data transfer over the network. Also, note that [`set_overflow_mode`](../../operations/settings/query-complexity.md#set_overflow_mode) is set to `throw` (by default) meaning that an exception is raised when these thresholds are met. + ### Distributed Subqueries and max_parallel_replicas {#max_parallel_replica-subqueries} When max_parallel_replicas is greater than 1, distributed queries are further transformed. For example, the following: diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 2e562e20467..6bb63ea06a6 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -197,12 +197,13 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL; ## MATERIALIZE COLUMN {#materialize-column} -Materializes the column in the parts where the column is missing. This is useful in case of creating a new column with complicated `DEFAULT` or `MATERIALIZED` expression. Calculation of the column directly on `SELECT` query can cause bigger request execution time, so it is reasonable to use `MATERIALIZE COLUMN` for such columns. To perform same manipulation for existing column, use `FINAL` modifier. +Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`). +It is used if it is necessary to add or update a column with a complicated expression, because evaluating such an expression directly on `SELECT` executing turns out to be expensive. Syntax: ```sql -ALTER TABLE table MATERIALIZE COLUMN col [FINAL]; +ALTER TABLE table MATERIALIZE COLUMN col; ``` **Example** @@ -211,20 +212,34 @@ ALTER TABLE table MATERIALIZE COLUMN col [FINAL]; DROP TABLE IF EXISTS tmp; SET mutations_sync = 2; CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple(); -INSERT INTO tmp SELECT * FROM system.numbers LIMIT 10; +INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5; ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x); ALTER TABLE tmp MATERIALIZE COLUMN s; +SELECT groupArray(x), groupArray(s) FROM (select x,s from tmp order by x); + +┌─groupArray(x)─┬─groupArray(s)─────────┐ +│ [0,1,2,3,4] │ ['0','1','2','3','4'] │ +└───────────────┴───────────────────────┘ + +ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(round(100/x)); + +INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5,5; + SELECT groupArray(x), groupArray(s) FROM tmp; -``` -**Result:** +┌─groupArray(x)─────────┬─groupArray(s)──────────────────────────────────┐ +│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','20','17','14','12','11'] │ +└───────────────────────┴────────────────────────────────────────────────┘ -```sql -┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────┐ -│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','5','6','7','8','9'] │ -└───────────────────────┴───────────────────────────────────────────┘ +ALTER TABLE tmp MATERIALIZE COLUMN s; + +SELECT groupArray(x), groupArray(s) FROM tmp; + +┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────────────────┐ +│ [0,1,2,3,4,5,6,7,8,9] │ ['inf','100','50','33','25','20','17','14','12','11'] │ +└───────────────────────┴───────────────────────────────────────────────────────┘ ``` **See Also** diff --git a/docs/en/whats-new/roadmap.md b/docs/en/whats-new/roadmap.md index 8872c42818f..54f8f9d68a3 100644 --- a/docs/en/whats-new/roadmap.md +++ b/docs/en/whats-new/roadmap.md @@ -5,6 +5,6 @@ toc_title: Roadmap # Roadmap {#roadmap} -The roadmap for the year 2021 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/17623). +The roadmap for the year 2022 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/32513). {## [Original article](https://clickhouse.com/docs/en/roadmap/) ##} diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 4448372c522..3f140f85396 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -872,3 +872,13 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' ``` Если диск сконфигурирован как `cold`, данные будут переноситься в S3 при срабатывании правил TTL или когда свободное место на локальном диске станет меньше порогового значения, которое определяется как `move_factor * disk_size`. + +## Виртуальные столбцы {#virtual-columns} + +- `_part` — Имя куска. +- `_part_index` — Номер куска по порядку в результате запроса. +- `_partition_id` — Имя партиции. +- `_part_uuid` — Уникальный идентификатор куска (если включена MergeTree настройка `assign_part_uuids`). +- `_partition_value` — Значения (кортеж) выражения `partition by`. +- `_sample_factor` — Коэффициент сэмплирования (из запроса). + diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index affa90d9840..ba5fc63331a 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2119,7 +2119,7 @@ ClickHouse генерирует исключение: - 1 — включен режим параллельного разбора. - 0 — отключен режим параллельного разбора. -Значение по умолчанию: `0`. +Значение по умолчанию: `1`. ## output_format_parallel_formatting {#output-format-parallel-formatting} @@ -2130,7 +2130,7 @@ ClickHouse генерирует исключение: - 1 — включен режим параллельного форматирования. - 0 — отключен режим параллельного форматирования. -Значение по умолчанию: `0`. +Значение по умолчанию: `1`. ## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing} diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index 4de2d067cce..fea4c00ac05 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -197,12 +197,13 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL; ## MATERIALIZE COLUMN {#materialize-column} -Материализует столбец таблицы в кусках, в которых отсутствуют значения. Используется, если необходимо создать новый столбец со сложным материализованным выражением или выражением для заполнения по умолчанию (`DEFAULT`), потому как вычисление такого столбца прямо во время выполнения запроса `SELECT` оказывается ощутимо затратным. Чтобы совершить ту же операцию для существующего столбца, используйте модификатор `FINAL`. +Материализует или обновляет столбец таблицы с выражением для значения по умолчанию (`DEFAULT` или `MATERIALIZED`). +Используется, если необходимо добавить или обновить столбец со сложным выражением, потому как вычисление такого выражения прямо во время выполнения запроса `SELECT` оказывается ощутимо затратным. Синтаксис: ```sql -ALTER TABLE table MATERIALIZE COLUMN col [FINAL]; +ALTER TABLE table MATERIALIZE COLUMN col; ``` **Пример** @@ -211,21 +212,39 @@ ALTER TABLE table MATERIALIZE COLUMN col [FINAL]; DROP TABLE IF EXISTS tmp; SET mutations_sync = 2; CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple(); -INSERT INTO tmp SELECT * FROM system.numbers LIMIT 10; +INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5; ALTER TABLE tmp ADD COLUMN s String MATERIALIZED toString(x); ALTER TABLE tmp MATERIALIZE COLUMN s; +SELECT groupArray(x), groupArray(s) FROM (select x,s from tmp order by x); + +┌─groupArray(x)─┬─groupArray(s)─────────┐ +│ [0,1,2,3,4] │ ['0','1','2','3','4'] │ +└───────────────┴───────────────────────┘ + +ALTER TABLE tmp MODIFY COLUMN s String MATERIALIZED toString(round(100/x)); + +INSERT INTO tmp SELECT * FROM system.numbers LIMIT 5,5; + SELECT groupArray(x), groupArray(s) FROM tmp; + +┌─groupArray(x)─────────┬─groupArray(s)──────────────────────────────────┐ +│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','20','17','14','12','11'] │ +└───────────────────────┴────────────────────────────────────────────────┘ + +ALTER TABLE tmp MATERIALIZE COLUMN s; + +SELECT groupArray(x), groupArray(s) FROM tmp; + +┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────────────────┐ +│ [0,1,2,3,4,5,6,7,8,9] │ ['inf','100','50','33','25','20','17','14','12','11'] │ +└───────────────────────┴───────────────────────────────────────────────────────┘ ``` -**Результат:** +**Смотрите также** -```sql -┌─groupArray(x)─────────┬─groupArray(s)─────────────────────────────┐ -│ [0,1,2,3,4,5,6,7,8,9] │ ['0','1','2','3','4','5','6','7','8','9'] │ -└───────────────────────┴───────────────────────────────────────────┘ -``` +- [MATERIALIZED](../../statements/create/table.md#materialized). ## Ограничения запроса ALTER {#ogranicheniia-zaprosa-alter} diff --git a/docs/zh/engines/database-engines/index.md b/docs/zh/engines/database-engines/index.md index e4647da154d..0d844365fbb 100644 --- a/docs/zh/engines/database-engines/index.md +++ b/docs/zh/engines/database-engines/index.md @@ -14,7 +14,7 @@ toc_title: Introduction - [MySQL](../../engines/database-engines/mysql.md) -- [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) +- [MaterializeMySQL](../../engines/database-engines/materialized-mysql.md) - [Lazy](../../engines/database-engines/lazy.md) diff --git a/docs/zh/engines/database-engines/materialized-mysql.md b/docs/zh/engines/database-engines/materialized-mysql.md deleted file mode 120000 index 02118b85df4..00000000000 --- a/docs/zh/engines/database-engines/materialized-mysql.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/engines/database-engines/materialized-mysql.md \ No newline at end of file diff --git a/docs/zh/engines/database-engines/materialized-mysql.md b/docs/zh/engines/database-engines/materialized-mysql.md new file mode 100644 index 00000000000..f654013494a --- /dev/null +++ b/docs/zh/engines/database-engines/materialized-mysql.md @@ -0,0 +1,274 @@ +--- +toc_priority: 29 +toc_title: MaterializedMySQL +--- + +# [experimental] MaterializedMySQL {#materialized-mysql} + +!!! warning "警告" + 这是一个实验性的特性,不应该在生产中使用. + + +创建ClickHouse数据库,包含MySQL中所有的表,以及这些表中的所有数据。 + +ClickHouse服务器作为MySQL副本工作。它读取binlog并执行DDL和DML查询。 + +## 创建数据库 {#creating-a-database} + +``` sql +CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] +ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...] +[TABLE OVERRIDE table1 (...), TABLE OVERRIDE table2 (...)] +``` + +**引擎参数** + +- `host:port` — MySQL 服务地址. +- `database` — MySQL 数据库名称. +- `user` — MySQL 用户名. +- `password` — MySQL 用户密码. + +**引擎配置** + + +- `max_rows_in_buffer` — 允许在内存中缓存数据的最大行数(对于单个表和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值:`65 505`。 +- `max_bytes_in_buffer` - 允许在内存中缓存数据的最大字节数(对于单个表和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值: `1 048 576 `。 +- `max_rows_in_buffers` - 允许在内存中缓存数据的最大行数(用于数据库和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值: `65 505`。 +- `max_bytes_in_buffers` - 允许在内存中缓存数据的最大字节数(用于数据库和无法查询的缓存数据)。当超过这个数字时,数据将被物化。默认值: `1 048 576`。 +- `max_flush_data_time ` - 允许数据在内存中缓存的最大毫秒数(对于数据库和无法查询的缓存数据)。当超过这个时间,数据将被物化。默认值: `1000`。 +- `max_wait_time_when_mysql_unavailable` - MySQL不可用时的重试间隔(毫秒)。负值禁用重试。默认值:`1000`。 +— `allows_query_when_mysql_lost `—允许在MySQL丢失时查询物化表。默认值:`0`(`false`)。 + +```sql +CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***') + SETTINGS + allows_query_when_mysql_lost=true, + max_wait_time_when_mysql_unavailable=10000; +``` + +**MySQL服务器端配置** + +为了`MaterializedMySQL`的正确工作,有一些必须设置的`MySQL`端配置设置: + +- `default_authentication_plugin = mysql_native_password `,因为 `MaterializedMySQL` 只能授权使用该方法。 +- `gtid_mode = on`,因为基于GTID的日志记录是提供正确的 `MaterializedMySQL`复制的强制要求。 + +!!! attention "注意" + 当打开`gtid_mode`时,您还应该指定`enforce_gtid_consistency = on`。 + +## 虚拟列 {#virtual-columns} + +当使用`MaterializeMySQL`数据库引擎时,[ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md)表与虚拟的`_sign`和`_version`列一起使用。 + +- `_version` — 事务版本. 类型 [UInt64](../../sql-reference/data-types/int-uint.md). +- `_sign` — 删除标记. 类型 [Int8](../../sql-reference/data-types/int-uint.md). 可能的值: + - `1` — 行没有删除, + - `-1` — 行已被删除. + +## 支持的数据类型 {#data_types-support} + +| MySQL | ClickHouse | +|-------------------------|--------------------------------------------------------------| +| TINY | [Int8](../../sql-reference/data-types/int-uint.md) | +| SHORT | [Int16](../../sql-reference/data-types/int-uint.md) | +| INT24 | [Int32](../../sql-reference/data-types/int-uint.md) | +| LONG | [UInt32](../../sql-reference/data-types/int-uint.md) | +| LONGLONG | [UInt64](../../sql-reference/data-types/int-uint.md) | +| FLOAT | [Float32](../../sql-reference/data-types/float.md) | +| DOUBLE | [Float64](../../sql-reference/data-types/float.md) | +| DECIMAL, NEWDECIMAL | [Decimal](../../sql-reference/data-types/decimal.md) | +| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) | +| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) | +| DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) | +| YEAR | [UInt16](../../sql-reference/data-types/int-uint.md) | +| TIME | [Int64](../../sql-reference/data-types/int-uint.md) | +| ENUM | [Enum](../../sql-reference/data-types/enum.md) | +| STRING | [String](../../sql-reference/data-types/string.md) | +| VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) | +| BLOB | [String](../../sql-reference/data-types/string.md) | +| GEOMETRY | [String](../../sql-reference/data-types/string.md) | +| BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) | +| BIT | [UInt64](../../sql-reference/data-types/int-uint.md) | +| SET | [UInt64](../../sql-reference/data-types/int-uint.md) | + +[Nullable](../../sql-reference/data-types/nullable.md) 已经被支持. + +MySQL中的Time 类型,会被ClickHouse转换成微秒来存储 + +不支持其他类型。如果MySQL表包含此类类型的列,ClickHouse抛出异常"Unhandled data type"并停止复制。 + +## 规范和推荐用法 {#specifics-and-recommendations} + +### 兼容性限制 {#compatibility-restrictions} + +除了数据类型的限制之外,还有一些限制与`MySQL`数据库相比有所不同,这应该在复制之前解决: + +- `MySQL` 中的每个表都应该包含 `PRIMARY KEY`。 +- 对于表的复制,那些包含 `ENUM` 字段值超出范围的行(在 `ENUM` 签名中指定)将不起作用。 + +### DDL Queries {#ddl-queries} + +MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句,比如: ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作,则会跳过。 + + +### 数据复制 {#data-replication} + +MaterializedMySQL不支持直接的 `INSERT`, `DELETE` 和 `UPDATE` 查询。然而,它们在数据复制方面得到了支持: + +- MySQL `INSERT`查询被转换为`_sign=1`的INSERT查询。 +- MySQL `DELETE`查询被转换为`INSERT`,并且`_sign=-1`。 +- 如果主键被修改了,MySQL的 `UPDATE` 查询将被转换为 `INSERT` 带 `_sign=1` 和INSERT 带有_sign=-1;如果主键没有被修改,则转换为`INSERT`和`_sign=1`。 + +### MaterializedMySQL 数据表查询 {#select} + +`SELECT` 查询从 `MaterializedMySQL`表有一些细节: + + - 如果在SELECT查询中没有指定`_version`,则 [FINAL](../../sql-reference/statements/select/from.md#select-from- FINAL)修饰符被使用,所以只有带有 `MAX(_version)`的行会返回每个主键值。 + + - 如果在SELECT查询中没有指定 `_sign`,则默认使用 `WHERE _sign=1 `。所以被删除的行不是 +包含在结果集中。 + + - 结果包括列注释,以防MySQL数据库表中存在这些列注释。 + +### 索引转换 {#index-conversion} + +在ClickHouse表中,MySQL的 `PRIMARY KEY` 和 `INDEX` 子句被转换为 `ORDER BY` 元组。 + +ClickHouse只有一个物理排序,由 `order by` 条件决定。要创建一个新的物理排序,请使用[materialized views](../../sql-reference/statements/create/view.md#materialized)。 + +**注意** + +- `_sign=-1` 的行不会被物理地从表中删除。 +- 级联 `UPDATE/DELETE` 查询不支持 `MaterializedMySQL` 引擎,因为他们在 MySQL binlog中不可见的 +— 复制很容易被破坏。 +— 禁止对数据库和表进行手工操作。 +- `MaterializedMySQL` 受[optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert)设置的影响。当MySQL服务器中的一个表发生变化时,数据会合并到 `MaterializedMySQL` 数据库中相应的表中。 + +### 表重写 {#table-overrides} + +表覆盖可用于自定义ClickHouse DDL查询,从而允许您对应用程序进行模式优化。这对于控制分区特别有用,分区对MaterializedMySQL的整体性能非常重要。 + +这些是你可以对MaterializedMySQL表重写的模式转换操作: + + * 修改列类型。必须与原始类型兼容,否则复制将失败。例如,可以将`UInt32`列修改为`UInt64`,不能将 `String` 列修改为 `Array(String)`。 + * 修改 [column TTL](../table-engines/mergetree-family/mergetree/#mergetree-column-ttl). + * 修改 [column compression codec](../../sql-reference/statements/create/table/#codecs). + * 增加 [ALIAS columns](../../sql-reference/statements/create/table/#alias). + * 增加 [skipping indexes](../table-engines/mergetree-family/mergetree/#table_engine-mergetree-data_skipping-indexes) + * 增加 [projections](../table-engines/mergetree-family/mergetree/#projections). + 请注意,当使用 `SELECT ... FINAL ` (MaterializedMySQL默认是这样做的) 时,预测优化是被禁用的,所以这里是受限的, `INDEX ... TYPE hypothesis `[在v21.12的博客文章中描述]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/)可能在这种情况下更有用。 + * 修改 [PARTITION BY](../table-engines/mergetree-family/custom-partitioning-key/) + * 修改 [ORDER BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses) + * 修改 [PRIMARY KEY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses) + * 增加 [SAMPLE BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses) + * 增加 [table TTL](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses) + +```sql +CREATE DATABASE db_name ENGINE = MaterializedMySQL(...) +[SETTINGS ...] +[TABLE OVERRIDE table_name ( + [COLUMNS ( + [col_name [datatype] [ALIAS expr] [CODEC(...)] [TTL expr], ...] + [INDEX index_name expr TYPE indextype[(...)] GRANULARITY val, ...] + [PROJECTION projection_name (SELECT [GROUP BY] [ORDER BY]), ...] + )] + [ORDER BY expr] + [PRIMARY KEY expr] + [PARTITION BY expr] + [SAMPLE BY expr] + [TTL expr] +), ...] +``` + +示例: + +```sql +CREATE DATABASE db_name ENGINE = MaterializedMySQL(...) +TABLE OVERRIDE table1 ( + COLUMNS ( + userid UUID, + category LowCardinality(String), + timestamp DateTime CODEC(Delta, Default) + ) + PARTITION BY toYear(timestamp) +), +TABLE OVERRIDE table2 ( + COLUMNS ( + client_ip String TTL created + INTERVAL 72 HOUR + ) + SAMPLE BY ip_hash +) +``` + + +`COLUMNS`列表是稀疏的;根据指定修改现有列,添加额外的ALIAS列。不可能添加普通列或实体化列。具有不同类型的已修改列必须可从原始类型赋值。在执行`CREATE DATABASE` 查询时,目前还没有验证这个或类似的问题,因此需要格外小心。 + +您可以为还不存在的表指定重写。 + +!!! warning "警告" + 如果使用时不小心,很容易用表重写中断复制。例如: + + * 如果一个ALIAS列被添加了一个表覆盖,并且一个具有相同名称的列后来被添加到源MySQL表,在ClickHouse中转换后的ALTER table查询将失败并停止复制。 + * 目前可以添加引用可空列的覆盖,而非空列是必需的,例如 `ORDER BY` 或 `PARTITION BY`。这将导致CREATE TABLE查询失败,也会导致复制停止。 + +## 使用示例 {#examples-of-use} + + MySQL 查询语句: + +``` sql +mysql> CREATE DATABASE db; +mysql> CREATE TABLE db.test (a INT PRIMARY KEY, b INT); +mysql> INSERT INTO db.test VALUES (1, 11), (2, 22); +mysql> DELETE FROM db.test WHERE a=1; +mysql> ALTER TABLE db.test ADD COLUMN c VARCHAR(16); +mysql> UPDATE db.test SET c='Wow!', b=222; +mysql> SELECT * FROM test; +``` + +```text +┌─a─┬───b─┬─c────┐ +│ 2 │ 222 │ Wow! │ +└───┴─────┴──────┘ +``` + +ClickHouse中的数据库,与MySQL服务器交换数据: + +创建的数据库和表: + +``` sql +CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***'); +SHOW TABLES FROM mysql; +``` + +``` text +┌─name─┐ +│ test │ +└──────┘ +``` + +数据插入之后: + +``` sql +SELECT * FROM mysql.test; +``` + +``` text +┌─a─┬──b─┐ +│ 1 │ 11 │ +│ 2 │ 22 │ +└───┴────┘ +``` + +删除数据后,添加列并更新: + +``` sql +SELECT * FROM mysql.test; +``` + +``` text +┌─a─┬───b─┬─c────┐ +│ 2 │ 222 │ Wow! │ +└───┴─────┴──────┘ +``` + +[来源文章](https://clickhouse.com/docs/en/engines/database-engines/materialized-mysql/) diff --git a/docs/zh/engines/database-engines/postgresql.md b/docs/zh/engines/database-engines/postgresql.md index 12b8133f404..4d2af9182f9 100644 --- a/docs/zh/engines/database-engines/postgresql.md +++ b/docs/zh/engines/database-engines/postgresql.md @@ -24,6 +24,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cac - `database` — 远程数据库名次 - `user` — PostgreSQL用户名称 - `password` — PostgreSQL用户密码 +- `schema` - PostgreSQL 模式 - `use_table_cache` — 定义数据库表结构是否已缓存或不进行。可选的。默认值: `0`. ## 支持的数据类型 {#data_types-support} diff --git a/docs/zh/engines/database-engines/replicated.md b/docs/zh/engines/database-engines/replicated.md index 9ffebe04571..bd5841491dd 100644 --- a/docs/zh/engines/database-engines/replicated.md +++ b/docs/zh/engines/database-engines/replicated.md @@ -31,6 +31,7 @@ CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_na 当创建数据库的新副本时,该副本会自己创建表。如果副本已经不可用很长一段时间,并且已经滞后于复制日志-它用ZooKeeper中的当前元数据检查它的本地元数据,将带有数据的额外表移动到一个单独的非复制数据库(以免意外地删除任何多余的东西),创建缺失的表,如果表名已经被重命名,则更新表名。数据在`ReplicatedMergeTree`级别被复制,也就是说,如果表没有被复制,数据将不会被复制(数据库只负责元数据)。 +允许[`ALTER TABLE ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md)查询,但不允许复制。数据库引擎将只向当前副本添加/获取/删除分区/部件。但是,如果表本身使用了Replicated表引擎,那么数据将在使用`ATTACH`后被复制。 ## 使用示例 {#usage-example} 创建三台主机的集群: diff --git a/docs/zh/operations/system-tables/query_thread_log.md b/docs/zh/operations/system-tables/query_thread_log.md index 33583f3b730..64f9ed27393 100644 --- a/docs/zh/operations/system-tables/query_thread_log.md +++ b/docs/zh/operations/system-tables/query_thread_log.md @@ -1,67 +1,62 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- - # 系统。query_thread_log {#system_tables-query_thread_log} 包含有关执行查询的线程的信息,例如,线程名称、线程开始时间、查询处理的持续时间。 -开始记录: +开启日志功能: -1. 在配置参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 科。 -2. 设置 [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) 到1。 +1. 在配置参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 部分。 +2. 设置 [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) 为1。 -数据的冲洗周期设置在 `flush_interval_milliseconds` 的参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。 要强制冲洗,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询。 +数据从缓存写入数据表周期时间参数 `flush_interval_milliseconds` 位于 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。如果需要强制从缓存写入数据表,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询请求。 -ClickHouse不会自动从表中删除数据。 看 [导言](../../operations/system-tables/index.md#system-tables-introduction) 欲了解更多详情。 +ClickHouse不会自动从表中删除数据。 欲了解更多详情,请参照 [介绍](../../operations/system-tables/index.md#system-tables-introduction)。 列: -- `event_date` ([日期](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query. -- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query. -- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — Start time of query execution. -- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution. -- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows. -- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes. -- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询,写入的行数。 对于其他查询,列值为0。 -- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询时,写入的字节数。 对于其他查询,列值为0。 -- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread. -- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. -- `thread_name` ([字符串](../../sql-reference/data-types/string.md)) — Name of the thread. -- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID. -- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID. -- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread. -- `query` ([字符串](../../sql-reference/data-types/string.md)) — Query string. -- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values: - - 1 — Query was initiated by the client. - - 0 — Query was initiated by another query for distributed query execution. -- `user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. -- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the query. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query. -- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). -- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). -- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. -- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query. -- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values: +- `event_date` ([日期](../../sql-reference/data-types/date.md)) — 该查询线程执行完成的日期。 +- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — 该查询线程执行完成的时间。 +- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — 查询的开始时间。 +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 查询执行持续的时间。 +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读取的行数。 +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读取的字节数。 +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 对于 `INSERT` 查询,写入的行数。 对于其他查询,为0。 +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 对于 `INSERT` 查询,写入的字节数。 对于其他查询,为0。 +- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — 在线程上下文,分配的内存和空闲内存之差。 +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — 在线程上下文,分配的内存和空闲内存之差的最大值。 +- `thread_name` ([字符串](../../sql-reference/data-types/string.md)) — 线程名。 +- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 内部线程ID。 +- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — 线程ID。 +- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS初始线程的初始ID。 +- `query` ([字符串](../../sql-reference/data-types/string.md)) — 查询语句。 +- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 查询类型,可能的值: + - 1 — 由用户发起的查询。 + - 0 — 由其他查询发起的分布式查询。 +- `user` ([字符串](../../sql-reference/data-types/string.md)) — 发起查询的用户名。 +- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — 查询的ID。 +- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起查询的IP地址。 +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的端口。 +- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的用户名(对于分布式查询)。 +- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的ID(对于分布式查询)。 +- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起该查询的父查询IP地址。 +- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起该查询的父查询端口。 +- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的界面,可能的值: - 1 — TCP. - 2 — HTTP. -- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — OS's username who runs [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md). -- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或者运行另一个TCP客户端。 -- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — The [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端名称。 -- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。 -- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。 -- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。 -- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端版本。 -- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values: - - 0 — The query was launched from the TCP interface. +- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — 使用 [clickhouse-client](../../interfaces/cli.md) 的系统用户名。 +- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — 运行 [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的主机名。 +- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的名称。 +- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的修订号。 +- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的主版本号。 +- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的次版本号。 +- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的补丁版本号。 +- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的HTTP方法,可能的值: + - 0 — 查询通过TCP界面发起。 - 1 — `GET` 方法被使用。 - 2 — `POST` 方法被使用。 -- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — The `UserAgent` http请求中传递的标头。 -- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — The “quota key” 在指定 [配额](../../operations/quotas.md) 设置(见 `keyed`). -- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. -- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [系统。活动](#system_tables-events). +- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — `UserAgent` HTTP请求中传递的UA表头。 +- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — “quota key” 在 [配额](../../operations/quotas.md) 设置内(详见 `keyed`). +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse 修订版本号. +- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — 对于该线程的多个指标计数器。这一项可以参考 [system.events](#system_tables-events). **示例** @@ -113,4 +108,5 @@ ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'Compr **另请参阅** -- [系统。query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` 系统表,其中包含有关查询执行的公共信息。 +- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — `query_log` 系统表描述,其中包含有关查询执行的公共信息。 +- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — 这个表包含在查询线程中使用的各个视图的信息。 diff --git a/docs/zh/sql-reference/data-types/int-uint.md b/docs/zh/sql-reference/data-types/int-uint.md index 3fb482639e7..e7fa27dcf70 100644 --- a/docs/zh/sql-reference/data-types/int-uint.md +++ b/docs/zh/sql-reference/data-types/int-uint.md @@ -1,17 +1,41 @@ -# UInt8,UInt16,UInt32,UInt64,Int8,Int16,Int32,Int64 {#uint8-uint16-uint32-uint64-int8-int16-int32-int64} +--- +toc_priority: 40 +toc_title: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 +--- + +# UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 固定长度的整型,包括有符号整型或无符号整型。 +创建表时,可以为整数设置类型参数 (例如. `TINYINT(8)`, `SMALLINT(16)`, `INT(32)`, `BIGINT(64)`), 但 ClickHouse 会忽略它们. + + ## 整型范围 {#int-ranges} -- Int8-\[-128:127\] -- Int16-\[-32768:32767\] -- Int32-\[-2147483648:2147483647\] -- Int64-\[-9223372036854775808:9223372036854775807\] + +- `Int8` — \[-128 : 127\] +- `Int16` — \[-32768 : 32767\] +- `Int32` — \[-2147483648 : 2147483647\] +- `Int64` — \[-9223372036854775808 : 9223372036854775807\] +- `Int128` — \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\] +- `Int256` — \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\] + +别名: + +- `Int8` — `TINYINT`, `BOOL`, `BOOLEAN`, `INT1`. +- `Int16` — `SMALLINT`, `INT2`. +- `Int32` — `INT`, `INT4`, `INTEGER`. +- `Int64` — `BIGINT`. ## 无符号整型范围 {#uint-ranges} -- UInt8-\[0:255\] -- UInt16-\[0:65535\] -- UInt32-\[0:4294967295\] -- UInt64-\[0:18446744073709551615\] + +- `UInt8` — \[0 : 255\] +- `UInt16` — \[0 : 65535\] +- `UInt32` — \[0 : 4294967295\] +- `UInt64` — \[0 : 18446744073709551615\] +- `UInt128` — \[0 : 340282366920938463463374607431768211455\] +- `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\] + + +[源文档](https://clickhouse.com/docs/en/data_types/int_uint/) diff --git a/docs/zh/whats-new/roadmap.md b/docs/zh/whats-new/roadmap.md index 3cb9dd6fa2f..8e8873c8ee4 100644 --- a/docs/zh/whats-new/roadmap.md +++ b/docs/zh/whats-new/roadmap.md @@ -5,6 +5,6 @@ toc_title: Roadmap # Roadmap {#roadmap} -`2021年Roadmap`已公布供公开讨论查看[这里](https://github.com/ClickHouse/ClickHouse/issues/17623). +`2022年Roadmap`已公布供公开讨论查看 [这里](https://github.com/ClickHouse/ClickHouse/issues/32513). {## [源文章](https://clickhouse.com/docs/en/roadmap/) ##} diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 1a5c7d3e492..86bf4a007a8 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -481,48 +481,76 @@ catch (...) void Client::connect() { - connection_parameters = ConnectionParameters(config()); - - if (is_interactive) - std::cout << "Connecting to " - << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " - : "") - << connection_parameters.host << ":" << connection_parameters.port - << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; + UInt16 default_port = ConnectionParameters::getPortFromConfig(config()); + connection_parameters = ConnectionParameters(config(), hosts_ports[0].host, + hosts_ports[0].port.value_or(default_port)); String server_name; UInt64 server_version_major = 0; UInt64 server_version_minor = 0; UInt64 server_version_patch = 0; - try + for (size_t attempted_address_index = 0; attempted_address_index < hosts_ports.size(); ++attempted_address_index) { - connection = Connection::createConnection(connection_parameters, global_context); + connection_parameters.host = hosts_ports[attempted_address_index].host; + connection_parameters.port = hosts_ports[attempted_address_index].port.value_or(default_port); - if (max_client_network_bandwidth) + if (is_interactive) + std::cout << "Connecting to " + << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " + : "") + << connection_parameters.host << ":" << connection_parameters.port + << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; + + try { - ThrottlerPtr throttler = std::make_shared(max_client_network_bandwidth, 0, ""); - connection->setThrottler(throttler); - } + connection = Connection::createConnection(connection_parameters, global_context); - connection->getServerVersion( - connection_parameters.timeouts, server_name, server_version_major, server_version_minor, server_version_patch, server_revision); - } - catch (const Exception & e) - { - /// It is typical when users install ClickHouse, type some password and instantly forget it. - if ((connection_parameters.user.empty() || connection_parameters.user == "default") - && e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED) + if (max_client_network_bandwidth) + { + ThrottlerPtr throttler = std::make_shared(max_client_network_bandwidth, 0, ""); + connection->setThrottler(throttler); + } + + connection->getServerVersion( + connection_parameters.timeouts, server_name, server_version_major, server_version_minor, server_version_patch, server_revision); + config().setString("host", connection_parameters.host); + config().setInt("port", connection_parameters.port); + break; + } + catch (const Exception & e) { - std::cerr << std::endl - << "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl - << "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl - << "and deleting this file will reset the password." << std::endl - << "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl - << std::endl; - } + /// It is typical when users install ClickHouse, type some password and instantly forget it. + /// This problem can't be fixed with reconnection so it is not attempted + if ((connection_parameters.user.empty() || connection_parameters.user == "default") + && e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED) + { + std::cerr << std::endl + << "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl + << "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl + << "and deleting this file will reset the password." << std::endl + << "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl + << std::endl; + throw; + } + else + { + if (attempted_address_index == hosts_ports.size() - 1) + throw; - throw; + if (is_interactive) + { + std::cerr << "Connection attempt to database at " + << connection_parameters.host << ":" << connection_parameters.port + << " resulted in failure" + << std::endl + << getExceptionMessage(e, false) + << std::endl + << "Attempting connection to the next provided address" + << std::endl; + } + } + } } server_version = toString(server_version_major) + "." + toString(server_version_minor) + "." + toString(server_version_patch); @@ -966,8 +994,11 @@ void Client::addOptions(OptionsDescription & options_description) /// Main commandline options related to client functionality and all parameters from Settings. options_description.main_description->add_options() ("config,c", po::value(), "config-file path (another shorthand)") - ("host,h", po::value()->default_value("localhost"), "server host") - ("port", po::value()->default_value(9000), "server port") + ("host,h", po::value>()->multitoken()->default_value({{"localhost"}}, "localhost"), + "list of server hosts with optionally assigned port to connect. List elements are separated by a space." + "Every list element looks like '[:]'. If port isn't assigned, connection is made by port from '--port' param" + "Example of usage: '-h host1:1 host2 host3:3'") + ("port", po::value()->default_value(9000), "server port, which is default port for every host from '--host' param") ("secure,s", "Use TLS connection") ("user,u", po::value()->default_value("default"), "user") /** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown. @@ -1074,8 +1105,8 @@ void Client::processOptions(const OptionsDescription & options_description, if (options.count("config")) config().setString("config-file", options["config"].as()); - if (options.count("host") && !options["host"].defaulted()) - config().setString("host", options["host"].as()); + if (options.count("host")) + hosts_ports = options["host"].as>(); if (options.count("interleave-queries-file")) interleave_queries_files = options["interleave-queries-file"].as>(); if (options.count("port") && !options["port"].defaulted()) diff --git a/programs/server/config.xml b/programs/server/config.xml index ce0c54f6730..def64607caf 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -217,13 +217,12 @@ /path/to/ssl_ca_cert_file - - deflate + none - - medium + + 0 -1 diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 55b8359d385..0b69bd5fd0e 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -86,7 +86,7 @@ enum class AccessType M(CREATE_DICTIONARY, "", DICTIONARY, CREATE) /* allows to execute {CREATE|ATTACH} DICTIONARY */\ M(CREATE_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables; implicitly enabled by the grant CREATE_TABLE on any table */ \ - M(CREATE_FUNCTION, "", DATABASE, CREATE) /* allows to execute CREATE FUNCTION */ \ + M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \ M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \ \ M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\ @@ -94,7 +94,7 @@ enum class AccessType M(DROP_VIEW, "", VIEW, DROP) /* allows to execute {DROP|DETACH} TABLE for views; implicitly enabled by the grant DROP_TABLE */\ M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\ - M(DROP_FUNCTION, "", DATABASE, DROP) /* allows to execute DROP FUNCTION */\ + M(DROP_FUNCTION, "", GLOBAL, DROP) /* allows to execute DROP FUNCTION */\ M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\ \ M(TRUNCATE, "TRUNCATE TABLE", TABLE, ALL) \ @@ -113,9 +113,9 @@ enum class AccessType M(ALTER_ROLE, "", GLOBAL, ACCESS_MANAGEMENT) \ M(DROP_ROLE, "", GLOBAL, ACCESS_MANAGEMENT) \ M(ROLE_ADMIN, "", GLOBAL, ACCESS_MANAGEMENT) /* allows to grant and revoke the roles which are not granted to the current user with admin option */\ - M(CREATE_ROW_POLICY, "CREATE POLICY", GLOBAL, ACCESS_MANAGEMENT) \ - M(ALTER_ROW_POLICY, "ALTER POLICY", GLOBAL, ACCESS_MANAGEMENT) \ - M(DROP_ROW_POLICY, "DROP POLICY", GLOBAL, ACCESS_MANAGEMENT) \ + M(CREATE_ROW_POLICY, "CREATE POLICY", TABLE, ACCESS_MANAGEMENT) \ + M(ALTER_ROW_POLICY, "ALTER POLICY", TABLE, ACCESS_MANAGEMENT) \ + M(DROP_ROW_POLICY, "DROP POLICY", TABLE, ACCESS_MANAGEMENT) \ M(CREATE_QUOTA, "", GLOBAL, ACCESS_MANAGEMENT) \ M(ALTER_QUOTA, "", GLOBAL, ACCESS_MANAGEMENT) \ M(DROP_QUOTA, "", GLOBAL, ACCESS_MANAGEMENT) \ @@ -124,7 +124,7 @@ enum class AccessType M(DROP_SETTINGS_PROFILE, "DROP PROFILE", GLOBAL, ACCESS_MANAGEMENT) \ M(SHOW_USERS, "SHOW CREATE USER", GLOBAL, SHOW_ACCESS) \ M(SHOW_ROLES, "SHOW CREATE ROLE", GLOBAL, SHOW_ACCESS) \ - M(SHOW_ROW_POLICIES, "SHOW POLICIES, SHOW CREATE ROW POLICY, SHOW CREATE POLICY", GLOBAL, SHOW_ACCESS) \ + M(SHOW_ROW_POLICIES, "SHOW POLICIES, SHOW CREATE ROW POLICY, SHOW CREATE POLICY", TABLE, SHOW_ACCESS) \ M(SHOW_QUOTAS, "SHOW CREATE QUOTA", GLOBAL, SHOW_ACCESS) \ M(SHOW_SETTINGS_PROFILES, "SHOW PROFILES, SHOW CREATE SETTINGS PROFILE, SHOW CREATE PROFILE", GLOBAL, SHOW_ACCESS) \ M(SHOW_ACCESS, "", GROUP, ACCESS_MANAGEMENT) \ diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 400ee55a35d..744c3571175 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -425,6 +425,7 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args | AccessType::TRUNCATE; const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY; + const AccessFlags function_ddl = AccessType::CREATE_FUNCTION | AccessType::DROP_FUNCTION; const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl; const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE; const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS; @@ -432,7 +433,7 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args const AccessFlags not_readonly_flags = write_table_access | table_and_dictionary_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY; const AccessFlags not_readonly_1_flags = AccessType::CREATE_TEMPORARY_TABLE; - const AccessFlags ddl_flags = table_ddl | dictionary_ddl; + const AccessFlags ddl_flags = table_ddl | dictionary_ddl | function_ddl; const AccessFlags introspection_flags = AccessType::INTROSPECTION; }; static const PrecalculatedFlags precalc; diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index 2881825dd17..3f3abff4e87 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -45,7 +45,15 @@ TEST(AccessRights, Union) lhs.grant(AccessType::INSERT); rhs.grant(AccessType::ALL, "db1"); lhs.makeUnion(rhs); - ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, CREATE FUNCTION, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*"); + ASSERT_EQ(lhs.toString(), + "GRANT INSERT ON *.*, " + "GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, " + "CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, " + "TRUNCATE, OPTIMIZE, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, " + "SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, " + "SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " + "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, " + "SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*"); } diff --git a/src/AggregateFunctions/ReservoirSampler.h b/src/AggregateFunctions/ReservoirSampler.h index 1d7529ee8e1..5f7ac13d908 100644 --- a/src/AggregateFunctions/ReservoirSampler.h +++ b/src/AggregateFunctions/ReservoirSampler.h @@ -239,6 +239,7 @@ private: UInt64 genRandom(size_t lim) { + assert(lim > 0); /// With a large number of values, we will generate random numbers several times slower. if (lim <= static_cast(rng.max())) return static_cast(rng()) % static_cast(lim); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 89ff019ba6e..ad86121a924 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1317,7 +1317,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (insert && insert->select) insert->tryFindInputFunction(input_function); - bool is_async_insert = global_context->getSettings().async_insert && insert && insert->hasInlinedData(); + bool is_async_insert = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData(); /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately. if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert) @@ -1929,7 +1929,7 @@ void ClientBase::init(int argc, char ** argv) /// Output of help message. if (options.count("help") - || (options.count("host") && options["host"].as() == "elp")) /// If user writes -help instead of --help. + || (options.count("host") && options["host"].as>()[0].host == "elp")) /// If user writes -help instead of --help. { printHelpMessage(options_description); exit(0); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index a3bd2c19d0f..e74a6a47d76 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -243,6 +244,25 @@ protected: } profile_events; QueryProcessingStage::Enum query_processing_stage; + + struct HostPort + { + String host; + std::optional port{}; + friend std::istream & operator>>(std::istream & in, HostPort & hostPort) + { + String host_with_port; + in >> host_with_port; + DB::DNSResolver & resolver = DB::DNSResolver::instance(); + std::pair> + host_and_port = resolver.resolveHostOrAddress(host_with_port); + hostPort.host = host_and_port.first.toString(); + hostPort.port = host_and_port.second; + + return in; + } + }; + std::vector hosts_ports{}; }; } diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index dbd463583f5..55569f080f6 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -23,15 +23,13 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config) +ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config, + std::string connection_host, + int connection_port) : host(connection_host), port(connection_port) { bool is_secure = config.getBool("secure", false); security = is_secure ? Protocol::Secure::Enable : Protocol::Secure::Disable; - host = config.getString("host", "localhost"); - port = config.getInt( - "port", config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); - default_database = config.getString("database", ""); /// changed the default value to "default" to fix the issue when the user in the prompt is blank @@ -61,12 +59,25 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati /// By default compression is disabled if address looks like localhost. compression = config.getBool("compression", !isLocalAddress(DNSResolver::instance().resolveHost(host))) - ? Protocol::Compression::Enable : Protocol::Compression::Disable; + ? Protocol::Compression::Enable : Protocol::Compression::Disable; timeouts = ConnectionTimeouts( - Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0)); + Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0), + Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0), + Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0), + Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0)); +} + +ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config) + : ConnectionParameters(config, config.getString("host", "localhost"), getPortFromConfig(config)) +{ +} + +int ConnectionParameters::getPortFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + bool is_secure = config.getBool("secure", false); + return config.getInt("port", + config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", + is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); } } diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index a169df8390a..dc509049c83 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -24,6 +24,9 @@ struct ConnectionParameters ConnectionParameters() {} ConnectionParameters(const Poco::Util::AbstractConfiguration & config); + ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, int port); + + static int getPortFromConfig(const Poco::Util::AbstractConfiguration & config); }; } diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index c18887b7a13..c4d75fed129 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -50,12 +50,12 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && if (!offsets_concrete) throw Exception("offsets_column must be a ColumnUInt64", ErrorCodes::LOGICAL_ERROR); - if (!offsets_concrete->empty() && nested_column) + if (!offsets_concrete->empty() && data) { Offset last_offset = offsets_concrete->getData().back(); /// This will also prevent possible overflow in offset. - if (nested_column->size() != last_offset) + if (data->size() != last_offset) throw Exception("offsets_column has data inconsistent with nested_column", ErrorCodes::LOGICAL_ERROR); } diff --git a/src/Common/ArenaUtils.h b/src/Common/ArenaUtils.h new file mode 100644 index 00000000000..0a588692367 --- /dev/null +++ b/src/Common/ArenaUtils.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include +#include + +/** Copy string value into Arena. + * Arena should support method: + * char * alloc(size_t size). + */ +template +inline StringRef copyStringInArena(Arena & arena, StringRef value) +{ + size_t key_size = value.size; + char * place_for_key = arena.alloc(key_size); + memcpy(reinterpret_cast(place_for_key), reinterpret_cast(value.data), key_size); + StringRef result{place_for_key, key_size}; + + return result; +} diff --git a/src/Common/ColumnsHashing.h b/src/Common/ColumnsHashing.h index f32707798f7..fbd3e71f9b8 100644 --- a/src/Common/ColumnsHashing.h +++ b/src/Common/ColumnsHashing.h @@ -387,47 +387,52 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod } template - ALWAYS_INLINE FindResult findFromRow(Data & data, size_t row_, Arena & pool) + ALWAYS_INLINE FindResult findKey(Data & data, size_t row_, Arena & pool) { size_t row = getIndexAt(row_); if (is_nullable && row == 0) { if constexpr (has_mapped) - return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData()); + return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData(), 0); else - return FindResult(data.hasNullKeyData()); + return FindResult(data.hasNullKeyData(), 0); } if (visit_cache[row] != VisitValue::Empty) { if constexpr (has_mapped) - return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found); + return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found, 0); else - return FindResult(visit_cache[row] == VisitValue::Found); + return FindResult(visit_cache[row] == VisitValue::Found, 0); } auto key_holder = getKeyHolder(row_, pool); - typename Data::iterator it; + typename Data::LookupResult it; if (saved_hash) - it = data.find(*key_holder, saved_hash[row]); + it = data.find(keyHolderGetKey(key_holder), saved_hash[row]); else - it = data.find(*key_holder); + it = data.find(keyHolderGetKey(key_holder)); - bool found = it != data.end(); + bool found = it; visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound; if constexpr (has_mapped) { if (found) - mapped_cache[row] = it->second; + mapped_cache[row] = it->getMapped(); } + size_t offset = 0; + + if constexpr (FindResult::has_offset) + offset = found ? data.offsetInternal(it) : 0; + if constexpr (has_mapped) - return FindResult(&mapped_cache[row], found); + return FindResult(&mapped_cache[row], found, offset); else - return FindResult(found); + return FindResult(found, offset); } template diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 13da3efd57a..1fbd1416d67 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -202,6 +202,45 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U return Poco::Net::SocketAddress(impl->cache_host(host).front(), port); } +std::pair> DNSResolver::resolveHostOrAddress(const std::string & host_and_port) +{ + Poco::Net::IPAddress ip; + + size_t number_of_colons = std::count(host_and_port.begin(), host_and_port.end(), ':'); + if (number_of_colons > 1) + { + /// IPv6 host + if (host_and_port.starts_with('[')) + { + size_t close_bracket_pos = host_and_port.find(']'); + assert(close_bracket_pos != std::string::npos); + ip = resolveHost(host_and_port.substr(0, close_bracket_pos)); + + if (close_bracket_pos == host_and_port.size() - 1) + return {ip, std::nullopt}; + if (host_and_port[close_bracket_pos + 1] != ':') + throw Exception("Missing delimiter between host and port", ErrorCodes::BAD_ARGUMENTS); + + unsigned int port; + if (!Poco::NumberParser::tryParseUnsigned(host_and_port.substr(close_bracket_pos + 2), port)) + throw Exception("Port must be numeric", ErrorCodes::BAD_ARGUMENTS); + if (port > 0xFFFF) + throw Exception("Port must be less 0xFFFF", ErrorCodes::BAD_ARGUMENTS); + return {ip, port}; + } + return {resolveHost(host_and_port), std::nullopt}; + } + else if (number_of_colons == 1) + { + /// IPv4 host with port + Poco::Net::SocketAddress socket = resolveAddress(host_and_port); + return {socket.host(), socket.port()}; + } + + /// IPv4 host + return {resolveHost(host_and_port), std::nullopt}; +} + String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address) { if (impl->disable_cache) diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index 3cefa37fd70..4ab422ab4ec 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -34,6 +34,10 @@ public: Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port); + /// Accepts host names like 'example.com'/'example.com:port' or '127.0.0.1'/'127.0.0.1:port' or '::1'/'[::1]:port' + /// and resolves its IP and port, if port is set + std::pair> resolveHostOrAddress(const std::string & host_and_port); + /// Accepts host IP and resolves its host name String reverseResolve(const Poco::Net::IPAddress & address); diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index aa507b1ce59..1e7d1b81b37 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -281,6 +281,10 @@ M(ExternalDataSourceLocalCacheReadBytes, "Bytes read from local cache buffer in RemoteReadBufferCache")\ \ M(MainConfigLoads, "Number of times the main configuration was reloaded.") \ + \ + M(ScalarSubqueriesGlobalCacheHit, "Number of times a read from a scalar subquery was done using the global cache") \ + M(ScalarSubqueriesLocalCacheHit, "Number of times a read from a scalar subquery was done using the local cache") \ + M(ScalarSubqueriesCacheMiss, "Number of times a read from a scalar subquery was not cached and had to be calculated completely") namespace ProfileEvents { diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h index b1d72578530..c3a6d7a8eff 100644 --- a/src/Coordination/SnapshotableHashTable.h +++ b/src/Coordination/SnapshotableHashTable.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,8 @@ private: /// Allows to avoid additional copies in updateValue function size_t snapshot_up_to_size = 0; ArenaWithFreeLists arena; + /// Collect invalid iterators to avoid traversing the whole list + std::vector snapshot_invalid_iters; uint64_t approximate_data_size{0}; @@ -113,17 +116,6 @@ private: } } - StringRef copyStringInArena(const std::string & value_to_copy) - { - size_t value_to_copy_size = value_to_copy.size(); - char * place_for_key = arena.alloc(value_to_copy_size); - memcpy(reinterpret_cast(place_for_key), reinterpret_cast(value_to_copy.data()), value_to_copy_size); - StringRef updated_value{place_for_key, value_to_copy_size}; - - return updated_value; - } - - public: using iterator = typename List::iterator; @@ -137,7 +129,7 @@ public: if (!it) { - ListElem elem{copyStringInArena(key), value, true}; + ListElem elem{copyStringInArena(arena, key), value, true}; auto itr = list.insert(list.end(), elem); bool inserted; map.emplace(itr->key, it, inserted, hash_value); @@ -159,7 +151,7 @@ public: if (it == map.end()) { - ListElem elem{copyStringInArena(key), value, true}; + ListElem elem{copyStringInArena(arena, key), value, true}; auto itr = list.insert(list.end(), elem); bool inserted; map.emplace(itr->key, it, inserted, hash_value); @@ -175,6 +167,7 @@ public: list_itr->active_in_map = false; auto new_list_itr = list.insert(list.end(), elem); it->getMapped() = new_list_itr; + snapshot_invalid_iters.push_back(list_itr); } else { @@ -195,6 +188,7 @@ public: if (snapshot_mode) { list_itr->active_in_map = false; + snapshot_invalid_iters.push_back(list_itr); list_itr->free_key = true; map.erase(it->getKey()); } @@ -235,6 +229,7 @@ public: { auto elem_copy = *(list_itr); list_itr->active_in_map = false; + snapshot_invalid_iters.push_back(list_itr); updater(elem_copy.value); auto itr = list.insert(list.end(), elem_copy); it->getMapped() = itr; @@ -274,23 +269,15 @@ public: void clearOutdatedNodes() { - auto start = list.begin(); - auto end = list.end(); - for (auto itr = start; itr != end;) + for (auto & itr: snapshot_invalid_iters) { - if (!itr->active_in_map) - { - updateDataSize(CLEAR_OUTDATED_NODES, itr->key.size, itr->value.sizeInBytes(), 0); - if (itr->free_key) - arena.free(const_cast(itr->key.data), itr->key.size); - itr = list.erase(itr); - } - else - { - assert(!itr->free_key); - itr++; - } + assert(!itr->active_in_map); + updateDataSize(CLEAR_OUTDATED_NODES, itr->key.size, itr->value.sizeInBytes(), 0); + if (itr->free_key) + arena.free(const_cast(itr->key.data), itr->key.size); + list.erase(itr); } + snapshot_invalid_iters.clear(); } void clear() @@ -310,7 +297,6 @@ public: void disableSnapshotMode() { - snapshot_mode = false; snapshot_up_to_size = 0; } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c4b4ab77867..ad7a64783d7 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -70,7 +70,9 @@ class IColumn; M(UInt64, idle_connection_timeout, 3600, "Close idle TCP connections after specified number of seconds.", 0) \ M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \ M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \ - M(UInt64, s3_min_upload_part_size, 32*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ + M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ + M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \ + M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 1000, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \ M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ @@ -262,6 +264,7 @@ class IColumn; M(UInt64, http_max_fields, 1000000, "Maximum number of fields in HTTP header", 0) \ M(UInt64, http_max_field_name_size, 1048576, "Maximum length of field name in HTTP header", 0) \ M(UInt64, http_max_field_value_size, 1048576, "Maximum length of field value in HTTP header", 0) \ + M(Bool, http_skip_not_found_url_for_globs, true, "Skip url's for globs with HTTP_NOT_FOUND error", 0) \ M(Bool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown", 0) \ M(Bool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.", 0) \ M(Bool, joined_subquery_requires_alias, true, "Force joined subqueries and table functions to have aliases for correct name qualification.", 0) \ @@ -426,6 +429,7 @@ class IColumn; M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ \ M(DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic, "Default database engine.", 0) \ + M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \ M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \ M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \ M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \ @@ -479,7 +483,6 @@ class IColumn; M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \ M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \ M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \ - M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \ M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ M(Bool, insert_null_as_default, true, "Insert DEFAULT values instead of NULL in INSERT SELECT (UNION ALL)", 0) \ @@ -579,6 +582,7 @@ class IColumn; MAKE_OBSOLETE(M, UInt64, merge_tree_clear_old_parts_interval_seconds, 1) \ MAKE_OBSOLETE(M, UInt64, partial_merge_join_optimizations, 0) \ MAKE_OBSOLETE(M, MaxThreads, max_alter_threads, 0) \ + MAKE_OBSOLETE(M, Bool, allow_experimental_projection_optimization, true) \ /** The section above is for obsolete settings. Do not add anything there. */ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 5d16f0a5c85..17d24946cd8 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -93,6 +93,16 @@ IMPLEMENT_SETTING_ENUM_WITH_RENAME(DefaultDatabaseEngine, ErrorCodes::BAD_ARGUME {{"Ordinary", DefaultDatabaseEngine::Ordinary}, {"Atomic", DefaultDatabaseEngine::Atomic}}) +IMPLEMENT_SETTING_ENUM_WITH_RENAME(DefaultTableEngine, ErrorCodes::BAD_ARGUMENTS, + {{"None", DefaultTableEngine::None}, + {"Log", DefaultTableEngine::Log}, + {"StripeLog", DefaultTableEngine::StripeLog}, + {"MergeTree", DefaultTableEngine::MergeTree}, + {"ReplacingMergeTree", DefaultTableEngine::ReplacingMergeTree}, + {"ReplicatedMergeTree", DefaultTableEngine::ReplicatedMergeTree}, + {"ReplicatedReplacingMergeTree", DefaultTableEngine::ReplicatedReplacingMergeTree}, + {"Memory", DefaultTableEngine::Memory}}) + IMPLEMENT_SETTING_MULTI_ENUM(MySQLDataTypesSupport, ErrorCodes::UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL, {{"decimal", MySQLDataTypesSupport::DECIMAL}, {"datetime64", MySQLDataTypesSupport::DATETIME64}}) diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index d29e4f15c27..27994529a0b 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -120,6 +120,19 @@ enum class DefaultDatabaseEngine DECLARE_SETTING_ENUM(DefaultDatabaseEngine) +enum class DefaultTableEngine +{ + None = 0, /// Disable. Need to use ENGINE = + Log, + StripeLog, + MergeTree, + ReplacingMergeTree, + ReplicatedMergeTree, + ReplicatedReplacingMergeTree, + Memory, +}; + +DECLARE_SETTING_ENUM(DefaultTableEngine) enum class MySQLDataTypesSupport { diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 29591a5f88f..9f56b5f7676 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -77,6 +77,10 @@ std::pair createTableFromAST( /// - the code is simpler, since the query is already brought to a suitable form. if (!ast_create_query.columns_list || !ast_create_query.columns_list->columns) { + if (!ast_create_query.storage || !ast_create_query.storage->engine) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid storage definition in metadata file: " + "it's a bug or result of manual intervention in metadata files"); + if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(ast_create_query.storage->engine->name)) throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); /// Leave columns empty. diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 9dbe611537b..8033d65c549 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -316,7 +316,7 @@ getTableOutput(const String & database_name, const String & table_name, ContextM return std::move(res.pipeline); } -static inline String reWriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection, const String & database_name, const String & table_name, const Settings & global_settings) +static inline String rewriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection, const String & database_name, const String & table_name, const Settings & global_settings) { Block tables_columns_sample_block { @@ -376,7 +376,7 @@ static inline void dumpDataForTables( auto pipeline = getTableOutput(database_name, table_name, query_context); StreamSettings mysql_input_stream_settings(context->getSettingsRef()); - String mysql_select_all_query = "SELECT " + reWriteMysqlQueryColumn(connection, mysql_database_name, table_name, context->getSettings()) + " FROM " + String mysql_select_all_query = "SELECT " + rewriteMysqlQueryColumn(connection, mysql_database_name, table_name, context->getSettingsRef()) + " FROM " + backQuoteIfNeed(mysql_database_name) + "." + backQuoteIfNeed(table_name); LOG_INFO(&Poco::Logger::get("MaterializedMySQLSyncThread(" + database_name + ")"), "mysql_select_all_query is {}", mysql_select_all_query); auto input = std::make_unique(connection, mysql_select_all_query, pipeline.getHeader(), mysql_input_stream_settings); diff --git a/src/Dictionaries/CacheDictionaryStorage.h b/src/Dictionaries/CacheDictionaryStorage.h index d6d04075a3d..566515c7cc8 100644 --- a/src/Dictionaries/CacheDictionaryStorage.h +++ b/src/Dictionaries/CacheDictionaryStorage.h @@ -8,10 +8,10 @@ #include #include #include +#include #include #include #include -#include namespace DB diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 5c2b6b27afd..f2d7febfa8e 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -623,17 +623,6 @@ void mergeBlockWithPipe( } } -template -static StringRef copyStringInArena(Arena & arena, StringRef value) -{ - size_t key_size = value.size; - char * place_for_key = arena.alloc(key_size); - memcpy(reinterpret_cast(place_for_key), reinterpret_cast(value.data), key_size); - StringRef result{place_for_key, key_size}; - - return result; -} - /** * Returns ColumnVector data as PaddedPodArray. diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index 48ddeed7fa6..62598c966e5 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -197,7 +197,7 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory) size_t max_command_execution_time = config.getUInt64(settings_config_prefix + ".max_command_execution_time", 10); - size_t max_execution_time_seconds = static_cast(context->getSettings().max_execution_time.totalSeconds()); + size_t max_execution_time_seconds = static_cast(context->getSettingsRef().max_execution_time.totalSeconds()); if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds) max_command_execution_time = max_execution_time_seconds; diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 40cc735557c..0c82da7b73b 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -13,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index e35340c7618..ea041c63d73 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -1,5 +1,6 @@ #include "HashedArrayDictionary.h" +#include #include #include #include diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index c83735a6330..b70f018df6b 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -1,5 +1,6 @@ #include "HashedDictionary.h" +#include #include #include #include diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 14c8fc7c749..5330bc684c3 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include #include diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index 292e60f17f9..adbe4084d81 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 2638365c7ad..5cfb4532b65 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -283,6 +283,8 @@ std::unique_ptr DiskS3::writeFile(const String & path, bucket, metadata.remote_fs_root_path + s3_path, settings->s3_min_upload_part_size, + settings->s3_upload_part_size_multiply_factor, + settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, std::move(object_metadata), buf_size, @@ -338,6 +340,8 @@ void DiskS3::createFileOperationObject(const String & operation_name, UInt64 rev bucket, remote_fs_root_path + key, settings->s3_min_upload_part_size, + settings->s3_upload_part_size_multiply_factor, + settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, metadata); @@ -417,6 +421,8 @@ void DiskS3::saveSchemaVersion(const int & version) bucket, remote_fs_root_path + SCHEMA_VERSION_OBJECT, settings->s3_min_upload_part_size, + settings->s3_upload_part_size_multiply_factor, + settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size); writeIntText(version, buffer); @@ -1076,6 +1082,8 @@ DiskS3Settings::DiskS3Settings( const std::shared_ptr & client_, size_t s3_max_single_read_retries_, size_t s3_min_upload_part_size_, + size_t s3_upload_part_size_multiply_factor_, + size_t s3_upload_part_size_multiply_parts_count_threshold_, size_t s3_max_single_part_upload_size_, size_t min_bytes_for_seek_, bool send_metadata_, @@ -1085,6 +1093,8 @@ DiskS3Settings::DiskS3Settings( : client(client_) , s3_max_single_read_retries(s3_max_single_read_retries_) , s3_min_upload_part_size(s3_min_upload_part_size_) + , s3_upload_part_size_multiply_factor(s3_upload_part_size_multiply_factor_) + , s3_upload_part_size_multiply_parts_count_threshold(s3_upload_part_size_multiply_parts_count_threshold_) , s3_max_single_part_upload_size(s3_max_single_part_upload_size_) , min_bytes_for_seek(min_bytes_for_seek_) , send_metadata(send_metadata_) diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index c5d0722c6c2..698fa6173c2 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -29,6 +29,8 @@ struct DiskS3Settings const std::shared_ptr & client_, size_t s3_max_single_read_retries_, size_t s3_min_upload_part_size_, + size_t s3_upload_part_size_multiply_factor_, + size_t s3_upload_part_size_multiply_parts_count_threshold_, size_t s3_max_single_part_upload_size_, size_t min_bytes_for_seek_, bool send_metadata_, @@ -39,6 +41,8 @@ struct DiskS3Settings std::shared_ptr client; size_t s3_max_single_read_retries; size_t s3_min_upload_part_size; + size_t s3_upload_part_size_multiply_factor; + size_t s3_upload_part_size_multiply_parts_count_threshold; size_t s3_max_single_part_upload_size; size_t min_bytes_for_seek; bool send_metadata; diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index f6824a1b3af..e16626a009a 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -155,6 +155,8 @@ std::unique_ptr getSettings(const Poco::Util::AbstractConfigurat getClient(config, config_prefix, context), config.getUInt64(config_prefix + ".s3_max_single_read_retries", context->getSettingsRef().s3_max_single_read_retries), config.getUInt64(config_prefix + ".s3_min_upload_part_size", context->getSettingsRef().s3_min_upload_part_size), + config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", context->getSettingsRef().s3_upload_part_size_multiply_factor), + config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold), config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", context->getSettingsRef().s3_max_single_part_upload_size), config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getBool(config_prefix + ".send_metadata", false), diff --git a/src/Functions/geoToH3.cpp b/src/Functions/geoToH3.cpp index 18951d1a03f..fb7301de776 100644 --- a/src/Functions/geoToH3.cpp +++ b/src/Functions/geoToH3.cpp @@ -11,6 +11,7 @@ #include #include +#include #include @@ -20,6 +21,8 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int INCORRECT_DATA; + extern const int ILLEGAL_COLUMN; + extern const int ARGUMENT_OUT_OF_BOUND; } namespace @@ -68,9 +71,35 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * col_lon = arguments[0].column.get(); - const auto * col_lat = arguments[1].column.get(); - const auto * col_res = arguments[2].column.get(); + const auto * col_lon = checkAndGetColumn(arguments[0].column.get()); + if (!col_lon) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be Float64.", + arguments[0].type->getName(), + 1, + getName()); + const auto & data_lon = col_lon->getData(); + + const auto * col_lat = checkAndGetColumn(arguments[1].column.get()); + if (!col_lat) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be Float64.", + arguments[1].type->getName(), + 2, + getName()); + const auto & data_lat = col_lat->getData(); + + const auto * col_res = checkAndGetColumn(arguments[2].column.get()); + if (!col_res) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt8.", + arguments[2].type->getName(), + 3, + getName()); + const auto & data_res = col_res->getData(); auto dst = ColumnVector::create(); auto & dst_data = dst->getData(); @@ -78,9 +107,17 @@ public: for (size_t row = 0; row < input_rows_count; ++row) { - const double lon = col_lon->getFloat64(row); - const double lat = col_lat->getFloat64(row); - const UInt8 res = col_res->getUInt(row); + const double lon = data_lon[row]; + const double lat = data_lat[row]; + const UInt8 res = data_res[row]; + + if (res > MAX_H3_RES) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + toString(res), + getName(), + MAX_H3_RES); LatLng coord; coord.lng = degsToRads(lon); diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index f6c1158a896..e290cbab36b 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -82,6 +82,7 @@ struct ReadSettings size_t http_max_tries = 1; size_t http_retry_initial_backoff_ms = 100; size_t http_retry_max_backoff_ms = 1600; + bool http_skip_not_found_url_for_globs = true; /// Set to true for MergeTree tables to make sure /// that last position (offset in compressed file) is always passed. diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index ce4d83105c0..4e08a595484 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -129,6 +130,8 @@ namespace detail /// In case of redirects, save result uri to use it if we retry the request. std::optional saved_uri_redirect; + bool http_skip_not_found_url; + ReadSettings settings; Poco::Logger * log; @@ -146,7 +149,7 @@ namespace detail return read_range.begin + offset_from_begin_pos; } - std::istream * call(Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_) + std::istream * callImpl(Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_) { // With empty path poco will send "POST HTTP/1.1" its bug. if (uri_.getPath().empty()) @@ -211,7 +214,7 @@ namespace detail { try { - call(uri, response, Poco::Net::HTTPRequest::HTTP_HEAD); + call(response, Poco::Net::HTTPRequest::HTTP_HEAD); while (isRedirect(response.getStatus())) { @@ -220,7 +223,7 @@ namespace detail session->updateSession(uri_redirect); - istr = call(uri_redirect, response, method); + istr = callImpl(uri_redirect, response, method); } break; @@ -237,6 +240,17 @@ namespace detail return read_range.end; } + enum class InitializeError + { + /// If error is not retriable, `exception` variable must be set. + NON_RETRIABLE_ERROR, + /// Allows to skip not found urls for globs + SKIP_NOT_FOUND_URL, + NONE, + }; + + InitializeError initialization_error = InitializeError::NONE; + public: using NextCallback = std::function; using OutStreamCallback = std::function; @@ -253,7 +267,8 @@ namespace detail Range read_range_ = {}, const RemoteHostFilter & remote_host_filter_ = {}, bool delay_initialization = false, - bool use_external_buffer_ = false) + bool use_external_buffer_ = false, + bool http_skip_not_found_url_ = false) : SeekableReadBufferWithSize(nullptr, 0) , uri {uri_} , method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET} @@ -265,6 +280,7 @@ namespace detail , buffer_size {buffer_size_} , use_external_buffer {use_external_buffer_} , read_range(read_range_) + , http_skip_not_found_url(http_skip_not_found_url_) , settings {settings_} , log(&Poco::Logger::get("ReadWriteBufferFromHTTP")) { @@ -276,18 +292,58 @@ namespace detail "0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})", settings.http_max_tries, settings.http_retry_initial_backoff_ms, settings.http_retry_max_backoff_ms); + // Configure User-Agent if it not already set. + const std::string user_agent = "User-Agent"; + auto iter = std::find_if(http_header_entries.begin(), http_header_entries.end(), [&user_agent](const HTTPHeaderEntry & entry) + { + return std::get<0>(entry) == user_agent; + }); + + if (iter == http_header_entries.end()) + { + http_header_entries.emplace_back(std::make_pair("User-Agent", fmt::format("ClickHouse/{}", VERSION_STRING))); + } + if (!delay_initialization) + { initialize(); + if (exception) + std::rethrow_exception(exception); + } + } + + void call(Poco::Net::HTTPResponse & response, const String & method_) + { + try + { + istr = callImpl(saved_uri_redirect ? *saved_uri_redirect : uri, response, method_); + } + catch (...) + { + if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND + && http_skip_not_found_url) + { + initialization_error = InitializeError::SKIP_NOT_FOUND_URL; + } + else + { + throw; + } + } } /** - * Note: In case of error return false if error is not retriable, otherwise throw. + * Throws if error is retriable, otherwise sets initialization_error = NON_RETRIABLE_ERROR and + * saves exception into `exception` variable. In case url is not found and skip_not_found_url == true, + * sets initialization_error = SKIP_NOT_FOUND_URL, otherwise throws. */ - bool initialize() + void initialize() { Poco::Net::HTTPResponse response; - istr = call(saved_uri_redirect ? *saved_uri_redirect : uri, response, method); + call(response, method); + if (initialization_error != InitializeError::NONE) + return; while (isRedirect(response.getStatus())) { @@ -296,7 +352,7 @@ namespace detail session->updateSession(uri_redirect); - istr = call(uri_redirect, response, method); + istr = callImpl(uri_redirect, response, method); saved_uri_redirect = uri_redirect; } @@ -310,7 +366,8 @@ namespace detail Exception(ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, "Cannot read with range: [{}, {}]", read_range.begin, read_range.end ? *read_range.end : '-')); - return false; + initialization_error = InitializeError::NON_RETRIABLE_ERROR; + return; } else if (read_range.end) { @@ -345,12 +402,14 @@ namespace detail sess->attachSessionData(e.message()); throw; } - - return true; } bool nextImpl() override { + if (initialization_error == InitializeError::SKIP_NOT_FOUND_URL) + return false; + assert(initialization_error == InitializeError::NONE); + if (next_callback) next_callback(count()); @@ -392,14 +451,16 @@ namespace detail { if (!impl) { - /// If error is not retriable -- false is returned and exception is set. - /// Otherwise the error is thrown and retries continue. - bool initialized = initialize(); - if (!initialized) + initialize(); + if (initialization_error == InitializeError::NON_RETRIABLE_ERROR) { assert(exception); break; } + else if (initialization_error == InitializeError::SKIP_NOT_FOUND_URL) + { + return false; + } if (use_external_buffer) { @@ -570,11 +631,12 @@ public: Range read_range_ = {}, const RemoteHostFilter & remote_host_filter_ = {}, bool delay_initialization_ = true, - bool use_external_buffer_ = false) + bool use_external_buffer_ = false, + bool skip_not_found_url_ = false) : Parent(std::make_shared(uri_, timeouts, max_redirects), uri_, credentials_, method_, out_stream_callback_, buffer_size_, settings_, http_header_entries_, read_range_, remote_host_filter_, - delay_initialization_, use_external_buffer_) + delay_initialization_, use_external_buffer_, skip_not_found_url_) { } }; diff --git a/src/IO/WriteBufferFromHTTP.cpp b/src/IO/WriteBufferFromHTTP.cpp index 5ddc28d2db1..622fab91fcc 100644 --- a/src/IO/WriteBufferFromHTTP.cpp +++ b/src/IO/WriteBufferFromHTTP.cpp @@ -10,6 +10,7 @@ WriteBufferFromHTTP::WriteBufferFromHTTP( const Poco::URI & uri, const std::string & method, const std::string & content_type, + const std::string & content_encoding, const ConnectionTimeouts & timeouts, size_t buffer_size_) : WriteBufferFromOStream(buffer_size_) @@ -24,6 +25,9 @@ WriteBufferFromHTTP::WriteBufferFromHTTP( request.set("Content-Type", content_type); } + if (!content_encoding.empty()) + request.set("Content-Encoding", content_encoding); + LOG_TRACE((&Poco::Logger::get("WriteBufferToHTTP")), "Sending request to {}", uri.toString()); ostr = &session->sendRequest(request); @@ -31,6 +35,10 @@ WriteBufferFromHTTP::WriteBufferFromHTTP( void WriteBufferFromHTTP::finalizeImpl() { + // for compressed body, the data is stored in buffered first + // here, make sure the content in the buffer has been flushed + this->nextImpl(); + receiveResponse(*session, request, response, false); /// TODO: Response body is ignored. } diff --git a/src/IO/WriteBufferFromHTTP.h b/src/IO/WriteBufferFromHTTP.h index 31b2a921889..6966bc8a5c5 100644 --- a/src/IO/WriteBufferFromHTTP.h +++ b/src/IO/WriteBufferFromHTTP.h @@ -21,6 +21,7 @@ public: explicit WriteBufferFromHTTP(const Poco::URI & uri, const std::string & method = Poco::Net::HTTPRequest::HTTP_POST, // POST or PUT only const std::string & content_type = "", + const std::string & content_encoding = "", const ConnectionTimeouts & timeouts = {}, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE); diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 1226b6567f9..9cb8942daf1 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -54,6 +54,8 @@ WriteBufferFromS3::WriteBufferFromS3( const String & bucket_, const String & key_, size_t minimum_upload_part_size_, + size_t upload_part_size_multiply_factor_, + size_t upload_part_size_multiply_threshold_, size_t max_single_part_upload_size_, std::optional> object_metadata_, size_t buffer_size_, @@ -63,7 +65,9 @@ WriteBufferFromS3::WriteBufferFromS3( , key(key_) , object_metadata(std::move(object_metadata_)) , client_ptr(std::move(client_ptr_)) - , minimum_upload_part_size(minimum_upload_part_size_) + , upload_part_size(minimum_upload_part_size_) + , upload_part_size_multiply_factor(upload_part_size_multiply_factor_) + , upload_part_size_multiply_threshold(upload_part_size_multiply_threshold_) , max_single_part_upload_size(max_single_part_upload_size_) , schedule(std::move(schedule_)) { @@ -85,9 +89,10 @@ void WriteBufferFromS3::nextImpl() if (multipart_upload_id.empty() && last_part_size > max_single_part_upload_size) createMultipartUpload(); - if (!multipart_upload_id.empty() && last_part_size > minimum_upload_part_size) + if (!multipart_upload_id.empty() && last_part_size > upload_part_size) { writePart(); + allocateBuffer(); } @@ -96,6 +101,9 @@ void WriteBufferFromS3::nextImpl() void WriteBufferFromS3::allocateBuffer() { + if (total_parts_uploaded != 0 && total_parts_uploaded % upload_part_size_multiply_threshold == 0) + upload_part_size *= upload_part_size_multiply_factor; + temporary_buffer = Aws::MakeShared("temporary buffer"); temporary_buffer->exceptions(std::ios::badbit); last_part_size = 0; @@ -246,6 +254,8 @@ void WriteBufferFromS3::processUploadRequest(UploadPartTask & task) } else throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + + total_parts_uploaded++; } void WriteBufferFromS3::completeMultipartUpload() diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 1eb8a771944..8b89626ee18 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -47,6 +47,8 @@ public: const String & bucket_, const String & key_, size_t minimum_upload_part_size_, + size_t upload_part_size_multiply_factor_, + size_t upload_part_size_multiply_threshold_, size_t max_single_part_upload_size_, std::optional> object_metadata_ = std::nullopt, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, @@ -85,11 +87,14 @@ private: String key; std::optional> object_metadata; std::shared_ptr client_ptr; - size_t minimum_upload_part_size; - size_t max_single_part_upload_size; + size_t upload_part_size; + const size_t upload_part_size_multiply_factor; + const size_t upload_part_size_multiply_threshold; + const size_t max_single_part_upload_size; /// Buffer to accumulate data. std::shared_ptr temporary_buffer; - size_t last_part_size; + size_t last_part_size = 0; + std::atomic total_parts_uploaded = 0; /// Upload in S3 is made in parts. /// We initiate upload, then upload each part and get ETag as a response, and then finalizeImpl() upload with listing all our parts. diff --git a/src/IO/tests/gtest_archive_reader_and_writer.cpp b/src/IO/tests/gtest_archive_reader_and_writer.cpp index c6b012a9914..e1864415e1b 100644 --- a/src/IO/tests/gtest_archive_reader_and_writer.cpp +++ b/src/IO/tests/gtest_archive_reader_and_writer.cpp @@ -328,14 +328,16 @@ TEST_P(ArchiveReaderAndWriterTest, ArchiveNotExist) } +#if USE_MINIZIP + namespace { const char * supported_archive_file_exts[] = { -#if USE_MINIZIP ".zip", -#endif }; } INSTANTIATE_TEST_SUITE_P(All, ArchiveReaderAndWriterTest, ::testing::ValuesIn(supported_archive_file_exts)); + +#endif diff --git a/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp b/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp index c88e9c299a8..72b4b149bd7 100644 --- a/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -45,22 +46,24 @@ namespace BlockIO InterpreterCreateRowPolicyQuery::execute() { auto & query = query_ptr->as(); - auto & access_control = getContext()->getAccessControl(); - getContext()->checkAccess(query.alter ? AccessType::ALTER_ROW_POLICY : AccessType::CREATE_ROW_POLICY); + auto required_access = getRequiredAccess(); if (!query.cluster.empty()) { query.replaceCurrentUserTag(getContext()->getUserName()); - return executeDDLQueryOnCluster(query_ptr, getContext()); + return executeDDLQueryOnCluster(query_ptr, getContext(), required_access); } assert(query.names->cluster.empty()); + auto & access_control = getContext()->getAccessControl(); + getContext()->checkAccess(required_access); + + query.replaceEmptyDatabase(getContext()->getCurrentDatabase()); + std::optional roles_from_query; if (query.roles) roles_from_query = RolesOrUsersSet{*query.roles, access_control, getContext()->getUserID()}; - query.replaceEmptyDatabase(getContext()->getCurrentDatabase()); - if (query.alter) { auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr @@ -105,4 +108,15 @@ void InterpreterCreateRowPolicyQuery::updateRowPolicyFromQuery(RowPolicy & polic updateRowPolicyFromQueryImpl(policy, query, {}, {}); } + +AccessRightsElements InterpreterCreateRowPolicyQuery::getRequiredAccess() const +{ + const auto & query = query_ptr->as(); + AccessRightsElements res; + auto access_type = (query.alter ? AccessType::ALTER_ROW_POLICY : AccessType::CREATE_ROW_POLICY); + for (const auto & row_policy_name : query.names->full_names) + res.emplace_back(access_type, row_policy_name.database, row_policy_name.table_name); + return res; +} + } diff --git a/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.h b/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.h index 8adfe6b0855..e76cc1c165d 100644 --- a/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.h +++ b/src/Interpreters/Access/InterpreterCreateRowPolicyQuery.h @@ -6,8 +6,8 @@ namespace DB { - class ASTCreateRowPolicyQuery; +class AccessRightsElements; struct RowPolicy; class InterpreterCreateRowPolicyQuery : public IInterpreter, WithMutableContext @@ -20,6 +20,8 @@ public: static void updateRowPolicyFromQuery(RowPolicy & policy, const ASTCreateRowPolicyQuery & query); private: + AccessRightsElements getRequiredAccess() const; + ASTPtr query_ptr; }; diff --git a/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp index 4d2e880561e..3437e7fe0f4 100644 --- a/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp @@ -49,12 +49,37 @@ AccessRightsElements InterpreterDropAccessEntityQuery::getRequiredAccess() const AccessRightsElements res; switch (query.type) { - case AccessEntityType::USER: res.emplace_back(AccessType::DROP_USER); return res; - case AccessEntityType::ROLE: res.emplace_back(AccessType::DROP_ROLE); return res; - case AccessEntityType::SETTINGS_PROFILE: res.emplace_back(AccessType::DROP_SETTINGS_PROFILE); return res; - case AccessEntityType::ROW_POLICY: res.emplace_back(AccessType::DROP_ROW_POLICY); return res; - case AccessEntityType::QUOTA: res.emplace_back(AccessType::DROP_QUOTA); return res; - case AccessEntityType::MAX: break; + case AccessEntityType::USER: + { + res.emplace_back(AccessType::DROP_USER); + return res; + } + case AccessEntityType::ROLE: + { + res.emplace_back(AccessType::DROP_ROLE); + return res; + } + case AccessEntityType::SETTINGS_PROFILE: + { + res.emplace_back(AccessType::DROP_SETTINGS_PROFILE); + return res; + } + case AccessEntityType::ROW_POLICY: + { + if (query.row_policy_names) + { + for (const auto & row_policy_name : query.row_policy_names->full_names) + res.emplace_back(AccessType::DROP_ROW_POLICY, row_policy_name.database, row_policy_name.table_name); + } + return res; + } + case AccessEntityType::QUOTA: + { + res.emplace_back(AccessType::DROP_QUOTA); + return res; + } + case AccessEntityType::MAX: + break; } throw Exception( toString(query.type) + ": type is not supported by DROP query", ErrorCodes::NOT_IMPLEMENTED); diff --git a/src/Interpreters/Access/InterpreterDropAccessEntityQuery.h b/src/Interpreters/Access/InterpreterDropAccessEntityQuery.h index 0ee478e904e..ea2d127913f 100644 --- a/src/Interpreters/Access/InterpreterDropAccessEntityQuery.h +++ b/src/Interpreters/Access/InterpreterDropAccessEntityQuery.h @@ -6,7 +6,6 @@ namespace DB { - class AccessRightsElements; class InterpreterDropAccessEntityQuery : public IInterpreter, WithMutableContext diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index 163cb57cab5..27345218e07 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -377,12 +377,48 @@ AccessRightsElements InterpreterShowCreateAccessEntityQuery::getRequiredAccess() AccessRightsElements res; switch (show_query.type) { - case AccessEntityType::USER: res.emplace_back(AccessType::SHOW_USERS); return res; - case AccessEntityType::ROLE: res.emplace_back(AccessType::SHOW_ROLES); return res; - case AccessEntityType::SETTINGS_PROFILE: res.emplace_back(AccessType::SHOW_SETTINGS_PROFILES); return res; - case AccessEntityType::ROW_POLICY: res.emplace_back(AccessType::SHOW_ROW_POLICIES); return res; - case AccessEntityType::QUOTA: res.emplace_back(AccessType::SHOW_QUOTAS); return res; - case AccessEntityType::MAX: break; + case AccessEntityType::USER: + { + res.emplace_back(AccessType::SHOW_USERS); + return res; + } + case AccessEntityType::ROLE: + { + res.emplace_back(AccessType::SHOW_ROLES); + return res; + } + case AccessEntityType::SETTINGS_PROFILE: + { + res.emplace_back(AccessType::SHOW_SETTINGS_PROFILES); + return res; + } + case AccessEntityType::ROW_POLICY: + { + if (show_query.row_policy_names) + { + for (const auto & row_policy_name : show_query.row_policy_names->full_names) + res.emplace_back(AccessType::SHOW_ROW_POLICIES, row_policy_name.database, row_policy_name.table_name); + } + else if (show_query.database_and_table_name) + { + if (show_query.database_and_table_name->second.empty()) + res.emplace_back(AccessType::SHOW_ROW_POLICIES, show_query.database_and_table_name->first); + else + res.emplace_back(AccessType::SHOW_ROW_POLICIES, show_query.database_and_table_name->first, show_query.database_and_table_name->second); + } + else + { + res.emplace_back(AccessType::SHOW_ROW_POLICIES); + } + return res; + } + case AccessEntityType::QUOTA: + { + res.emplace_back(AccessType::SHOW_QUOTAS); + return res; + } + case AccessEntityType::MAX: + break; } throw Exception(toString(show_query.type) + ": type is not supported by SHOW CREATE query", ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index c3fd8b8024a..a2f24a79e40 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -855,12 +855,18 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl( void NO_INLINE Aggregator::executeOnIntervalWithoutKeyImpl( - AggregatedDataWithoutKey & res, + AggregatedDataVariants & data_variants, size_t row_begin, size_t row_end, AggregateFunctionInstruction * aggregate_instructions, - Arena * arena) + Arena * arena) const { + /// `data_variants` will destroy the states of aggregate functions in the destructor + data_variants.aggregator = this; + data_variants.init(AggregatedDataVariants::Type::without_key); + + AggregatedDataWithoutKey & res = data_variants.without_key; + /// Adding values for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst) { @@ -1623,15 +1629,32 @@ Block Aggregator::prepareBlockAndFill( } void Aggregator::addSingleKeyToAggregateColumns( - const AggregatedDataVariants & data_variants, + AggregatedDataVariants & data_variants, MutableColumns & aggregate_columns) const { - const auto & data = data_variants.without_key; - for (size_t i = 0; i < params.aggregates_size; ++i) + auto & data = data_variants.without_key; + + size_t i = 0; + try { - auto & column_aggregate_func = assert_cast(*aggregate_columns[i]); - column_aggregate_func.getData().push_back(data + offsets_of_aggregate_states[i]); + for (i = 0; i < params.aggregates_size; ++i) + { + auto & column_aggregate_func = assert_cast(*aggregate_columns[i]); + column_aggregate_func.getData().push_back(data + offsets_of_aggregate_states[i]); + } } + catch (...) + { + /// Rollback + for (size_t rollback_i = 0; rollback_i < i; ++rollback_i) + { + auto & column_aggregate_func = assert_cast(*aggregate_columns[rollback_i]); + column_aggregate_func.getData().pop_back(); + } + throw; + } + + data = nullptr; } void Aggregator::addArenasToAggregateColumns( diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index c79c2c5ef64..05c9133cb35 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1138,12 +1138,12 @@ private: AggregateFunctionInstruction * aggregate_instructions, Arena * arena) const; - static void executeOnIntervalWithoutKeyImpl( - AggregatedDataWithoutKey & res, + void executeOnIntervalWithoutKeyImpl( + AggregatedDataVariants & data_variants, size_t row_begin, size_t row_end, AggregateFunctionInstruction * aggregate_instructions, - Arena * arena); + Arena * arena) const; template void writeToTemporaryFileImpl( @@ -1307,7 +1307,7 @@ private: NestedColumnsHolder & nested_columns_holder) const; void addSingleKeyToAggregateColumns( - const AggregatedDataVariants & data_variants, + AggregatedDataVariants & data_variants, MutableColumns & aggregate_columns) const; void addArenasToAggregateColumns( diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp index 8b68ba02504..df6e8ea98f5 100644 --- a/src/Interpreters/ClusterDiscovery.cpp +++ b/src/Interpreters/ClusterDiscovery.cpp @@ -229,7 +229,7 @@ ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info) bool secure = cluster_info.current_node.secure; auto cluster = std::make_shared( - context->getSettings(), + context->getSettingsRef(), shards, /* username= */ context->getUserName(), /* password= */ "", diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 0cb046b9de4..822f1dcb534 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3148,6 +3148,7 @@ ReadSettings Context::getReadSettings() const res.http_max_tries = settings.http_max_tries; res.http_retry_initial_backoff_ms = settings.http_retry_initial_backoff_ms; res.http_retry_max_backoff_ms = settings.http_retry_max_backoff_ms; + res.http_skip_not_found_url_for_globs = settings.http_skip_not_found_url_for_globs; res.mmap_cache = getMMappedFileCache().get(); diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index a81d4204565..ac8a27484d9 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -1,9 +1,9 @@ #include -#include #include -#include +#include #include +#include #include #include #include @@ -18,7 +18,14 @@ #include #include #include +#include +namespace ProfileEvents +{ +extern const Event ScalarSubqueriesGlobalCacheHit; +extern const Event ScalarSubqueriesLocalCacheHit; +extern const Event ScalarSubqueriesCacheMiss; +} namespace DB { @@ -72,40 +79,95 @@ static bool worthConvertingToLiteral(const Block & scalar) return !useless_literal_types.count(scalar_type_name); } +static auto getQueryInterpreter(const ASTSubquery & subquery, ExecuteScalarSubqueriesMatcher::Data & data) +{ + auto subquery_context = Context::createCopy(data.getContext()); + Settings subquery_settings = data.getContext()->getSettings(); + subquery_settings.max_result_rows = 1; + subquery_settings.extremes = false; + subquery_context->setSettings(subquery_settings); + if (!data.only_analyze && subquery_context->hasQueryContext()) + { + /// Save current cached scalars in the context before analyzing the query + /// This is specially helpful when analyzing CTE scalars + auto context = subquery_context->getQueryContext(); + for (const auto & it : data.scalars) + context->addScalar(it.first, it.second); + } + + ASTPtr subquery_select = subquery.children.at(0); + + auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1, true); + options.analyze(data.only_analyze); + + return std::make_unique(subquery_select, subquery_context, options); +} + void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data) { auto hash = subquery.getTreeHash(); auto scalar_query_hash_str = toString(hash.first) + "_" + toString(hash.second); + std::unique_ptr interpreter = nullptr; + bool hit = false; + bool is_local = false; + Block scalar; - if (data.getContext()->hasQueryContext() && data.getContext()->getQueryContext()->hasScalar(scalar_query_hash_str)) + if (data.local_scalars.count(scalar_query_hash_str)) { - scalar = data.getContext()->getQueryContext()->getScalar(scalar_query_hash_str); + hit = true; + scalar = data.local_scalars[scalar_query_hash_str]; + is_local = true; + ProfileEvents::increment(ProfileEvents::ScalarSubqueriesLocalCacheHit); } else if (data.scalars.count(scalar_query_hash_str)) { + hit = true; scalar = data.scalars[scalar_query_hash_str]; + ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit); } else { - auto subquery_context = Context::createCopy(data.getContext()); - Settings subquery_settings = data.getContext()->getSettings(); - subquery_settings.max_result_rows = 1; - subquery_settings.extremes = false; - subquery_context->setSettings(subquery_settings); + if (data.getContext()->hasQueryContext() && data.getContext()->getQueryContext()->hasScalar(scalar_query_hash_str)) + { + if (!data.getContext()->getViewSource()) + { + /// We aren't using storage views so we can safely use the context cache + scalar = data.getContext()->getQueryContext()->getScalar(scalar_query_hash_str); + ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit); + hit = true; + } + else + { + /// If we are under a context that uses views that means that the cache might contain values that reference + /// the original table and not the view, so in order to be able to check the global cache we need to first + /// make sure that the query doesn't use the view + /// Note in any case the scalar will end up cached in *data* so this won't be repeated inside this context + interpreter = getQueryInterpreter(subquery, data); + if (!interpreter->usesViewSource()) + { + scalar = data.getContext()->getQueryContext()->getScalar(scalar_query_hash_str); + ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit); + hit = true; + } + } + } + } - ASTPtr subquery_select = subquery.children.at(0); + if (!hit) + { + if (!interpreter) + interpreter = getQueryInterpreter(subquery, data); - auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1, true); - options.analyze(data.only_analyze); + ProfileEvents::increment(ProfileEvents::ScalarSubqueriesCacheMiss); + is_local = interpreter->usesViewSource(); - auto interpreter = InterpreterSelectWithUnionQuery(subquery_select, subquery_context, options); Block block; if (data.only_analyze) { /// If query is only analyzed, then constants are not correct. - block = interpreter.getSampleBlock(); + block = interpreter->getSampleBlock(); for (auto & column : block) { if (column.column->empty()) @@ -118,14 +180,14 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr } else { - auto io = interpreter.execute(); + auto io = interpreter->execute(); PullingAsyncPipelineExecutor executor(io.pipeline); while (block.rows() == 0 && executor.pull(block)); if (block.rows() == 0) { - auto types = interpreter.getSampleBlock().getDataTypes(); + auto types = interpreter->getSampleBlock().getDataTypes(); if (types.size() != 1) types = {std::make_shared(types)}; @@ -218,7 +280,10 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr ast = std::move(func); } - data.scalars[scalar_query_hash_str] = std::move(scalar); + if (is_local) + data.local_scalars[scalar_query_hash_str] = std::move(scalar); + else + data.scalars[scalar_query_hash_str] = std::move(scalar); } void ExecuteScalarSubqueriesMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data) diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.h b/src/Interpreters/ExecuteScalarSubqueriesVisitor.h index c230f346779..d702404dab6 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.h +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.h @@ -19,11 +19,8 @@ struct ASTTableExpression; * * Features * - * A replacement occurs during query analysis, and not during the main runtime. - * This means that the progress indicator will not work during the execution of these requests, - * and also such queries can not be aborted. - * - * But the query result can be used for the index in the table. + * A replacement occurs during query analysis, and not during the main runtime, so + * the query result can be used for the index in the table. * * Scalar subqueries are executed on the request-initializer server. * The request is sent to remote servers with already substituted constants. @@ -37,6 +34,7 @@ public: { size_t subquery_depth; Scalars & scalars; + Scalars & local_scalars; bool only_analyze; }; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index f2aa50f3c23..30c832e4917 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -159,8 +159,8 @@ static void setLazyExecutionInfo( const ActionsDAGReverseInfo::NodeInfo & node_info = reverse_info.nodes_info[reverse_info.reverse_index.at(node)]; - /// If node is used in result, we can't enable lazy execution. - if (node_info.used_in_result) + /// If node is used in result or it doesn't have parents, we can't enable lazy execution. + if (node_info.used_in_result || node_info.parents.empty()) lazy_execution_info.can_be_lazy_executed = false; /// To fill lazy execution info for current node we need to create it for all it's parents. diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.h b/src/Interpreters/IInterpreterUnionOrSelectQuery.h index db9cc086e35..1f59dd36354 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.h +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.h @@ -40,6 +40,15 @@ public: void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const override; + /// Returns whether the query uses the view source from the Context + /// The view source is a virtual storage that currently only materialized views use to replace the source table + /// with the incoming block only + /// This flag is useful to know for how long we can cache scalars generated by this query: If it doesn't use the virtual storage + /// then we can cache the scalars forever (for any query that doesn't use the virtual storage either), but if it does use the virtual + /// storage then we can only keep the scalar result around while we are working with that source block + /// You can find more details about this under ExecuteScalarSubqueriesMatcher::visit + bool usesViewSource() { return uses_view_source; } + protected: ASTPtr query_ptr; ContextMutablePtr context; @@ -48,6 +57,7 @@ protected: size_t max_streams = 1; bool settings_limit_offset_needed = false; bool settings_limit_offset_done = false; + bool uses_view_source = false; }; } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index d2b77f1a439..4e0561ad750 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -2,6 +2,7 @@ #include +#include "Common/Exception.h" #include #include #include @@ -12,6 +13,7 @@ #include #include +#include #include #include @@ -91,6 +93,7 @@ namespace ErrorCodes extern const int UNKNOWN_DATABASE; extern const int PATH_ACCESS_DENIED; extern const int NOT_IMPLEMENTED; + extern const int ENGINE_REQUIRED; } namespace fs = std::filesystem; @@ -157,6 +160,9 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", serializeAST(*create.storage)); } + if (create.storage && !create.storage->engine) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Database engine must be specified"); + if (create.storage->engine->name == "Atomic" || create.storage->engine->name == "Replicated" || create.storage->engine->name == "MaterializedPostgreSQL") @@ -581,6 +587,17 @@ ConstraintsDescription InterpreterCreateQuery::getConstraintsDescription(const A InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const { + /// Set the table engine if it was not specified explicitly. + setEngine(create); + + /// We have to check access rights again (in case engine was changed). + if (create.storage) + { + auto source_access_type = StorageFactory::instance().getSourceAccessType(create.storage->engine->name); + if (source_access_type != AccessType::NONE) + getContext()->checkAccess(source_access_type); + } + TableProperties properties; TableLockHolder as_storage_lock; @@ -645,7 +662,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti } /// We can have queries like "CREATE TABLE
Test nameTest statusDescription
ENGINE=" if /// supports schema inference (will determine table structure in it's constructor). - else if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(create.storage->engine->name)) + else if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(create.storage->engine->name)) // NOLINT throw Exception("Incorrect CREATE query: required list of column descriptions or AS section or SELECT.", ErrorCodes::INCORRECT_QUERY); /// Even if query has list of columns, canonicalize it (unfold Nested columns). @@ -663,8 +680,6 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti create.columns_list->setOrReplace(create.columns_list->projections, new_projections); validateTableStructure(create, properties); - /// Set the table engine if it was not specified explicitly. - setEngine(create); assert(as_database_saved.empty() && as_table_saved.empty()); std::swap(create.as_database, as_database_saved); @@ -718,30 +733,90 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat } } +String InterpreterCreateQuery::getTableEngineName(DefaultTableEngine default_table_engine) +{ + switch (default_table_engine) + { + case DefaultTableEngine::Log: + return "Log"; + + case DefaultTableEngine::StripeLog: + return "StripeLog"; + + case DefaultTableEngine::MergeTree: + return "MergeTree"; + + case DefaultTableEngine::ReplacingMergeTree: + return "ReplacingMergeTree"; + + case DefaultTableEngine::ReplicatedMergeTree: + return "ReplicatedMergeTree"; + + case DefaultTableEngine::ReplicatedReplacingMergeTree: + return "ReplicatedReplacingMergeTree"; + + case DefaultTableEngine::Memory: + return "Memory"; + + default: + throw Exception("default_table_engine is set to unknown value", ErrorCodes::LOGICAL_ERROR); + } +} + +void InterpreterCreateQuery::setDefaultTableEngine(ASTStorage & storage, ContextPtr local_context) +{ + if (local_context->getSettingsRef().default_table_engine.value == DefaultTableEngine::None) + throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); + + auto engine_ast = std::make_shared(); + auto default_table_engine = local_context->getSettingsRef().default_table_engine.value; + engine_ast->name = getTableEngineName(default_table_engine); + engine_ast->no_empty_args = true; + storage.set(storage.engine, engine_ast); +} + void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const { if (create.as_table_function) return; - if (create.storage || create.is_dictionary || create.isView()) - { - if (create.temporary && create.storage && create.storage->engine && create.storage->engine->name != "Memory") - throw Exception(ErrorCodes::INCORRECT_QUERY, - "Temporary tables can only be created with ENGINE = Memory, not {}", create.storage->engine->name); - + if (create.is_dictionary || create.is_ordinary_view || create.is_live_view || create.is_window_view) + return; + + if (create.is_materialized_view && create.to_table_id) return; - } if (create.temporary) { + if (create.storage && create.storage->engine && create.storage->engine->name != "Memory") + throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables can only be created with ENGINE = Memory, not {}", + create.storage->engine->name); + + /// It's possible if some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not. + /// It makes sense when default_table_engine setting is used, but not for temporary tables. + /// For temporary tables we ignore this setting to allow CREATE TEMPORARY TABLE query without specifying ENGINE + /// even if setting is set to MergeTree or something like that (otherwise MergeTree will be substituted and query will fail). + if (create.storage && !create.storage->engine) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Invalid storage definition for temporary table: must be either ENGINE = Memory or empty"); + auto engine_ast = std::make_shared(); engine_ast->name = "Memory"; engine_ast->no_empty_args = true; auto storage_ast = std::make_shared(); storage_ast->set(storage_ast->engine, engine_ast); create.set(create.storage, storage_ast); + return; } - else if (!create.as_table.empty()) + + if (create.storage) + { + /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. + if (!create.storage->engine) + setDefaultTableEngine(*create.storage, getContext()); + return; + } + + if (!create.as_table.empty()) { /// NOTE Getting the structure from the table specified in the AS is done not atomically with the creation of the table. @@ -754,24 +829,16 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const const String qualified_name = backQuoteIfNeed(as_database_name) + "." + backQuoteIfNeed(as_table_name); if (as_create.is_ordinary_view) - throw Exception( - "Cannot CREATE a table AS " + qualified_name + ", it is a View", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a View", qualified_name); if (as_create.is_live_view) - throw Exception( - "Cannot CREATE a table AS " + qualified_name + ", it is a Live View", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Live View", qualified_name); if (as_create.is_window_view) - throw Exception( - "Cannot CREATE a table AS " + qualified_name + ", it is a Window View", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Window View", qualified_name); if (as_create.is_dictionary) - throw Exception( - "Cannot CREATE a table AS " + qualified_name + ", it is a Dictionary", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Dictionary", qualified_name); if (as_create.storage) create.set(create.storage, as_create.storage->ptr()); @@ -779,7 +846,12 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const create.as_table_function = as_create.as_table_function->clone(); else throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set engine, it's a bug."); + + return; } + + create.set(create.storage, std::make_shared()); + setDefaultTableEngine(*create.storage, getContext()); } static void generateUUIDForTable(ASTCreateQuery & create) diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 03c4b4ae1b6..5804d817fe2 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -15,6 +16,7 @@ namespace DB class ASTCreateQuery; class ASTExpressionList; class ASTConstraintDeclaration; +class ASTStorage; class IDatabase; using DatabasePtr = std::shared_ptr; @@ -81,6 +83,8 @@ private: /// Calculate list of columns, constraints, indices, etc... of table. Rewrite query in canonical way. TableProperties getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const; void validateTableStructure(const ASTCreateQuery & create, const TableProperties & properties) const; + static String getTableEngineName(DefaultTableEngine default_table_engine); + static void setDefaultTableEngine(ASTStorage & storage, ContextPtr local_context); void setEngine(ASTCreateQuery & create) const; AccessRightsElements getRequiredAccess() const; diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp index 3bb78b57702..cad570ab420 100644 --- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp @@ -68,7 +68,10 @@ InterpreterSelectIntersectExceptQuery::InterpreterSelectIntersectExceptQuery( nested_interpreters.resize(num_children); for (size_t i = 0; i < num_children; ++i) + { nested_interpreters[i] = buildCurrentChildInterpreter(children.at(i)); + uses_view_source |= nested_interpreters[i]->usesViewSource(); + } Blocks headers(num_children); for (size_t query_num = 0; query_num < num_children; ++query_num) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index dc00edad612..ac807f4f782 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -64,8 +64,9 @@ #include #include -#include #include +#include +#include #include #include @@ -315,6 +316,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (!has_input && !storage) { storage = joined_tables.getLeftTableStorage(); + // Mark uses_view_source if the returned storage is the same as the one saved in viewSource + uses_view_source |= storage && storage == context->getViewSource(); got_storage_from_query = true; } @@ -336,6 +339,15 @@ InterpreterSelectQuery::InterpreterSelectQuery( joined_tables.reset(getSelectQuery()); joined_tables.resolveTables(); + if (auto view_source = context->getViewSource()) + { + // If we are using a virtual block view to replace a table and that table is used + // inside the JOIN then we need to update uses_view_source accordingly so we avoid propagating scalars that we can't cache + const auto & storage_values = static_cast(*view_source); + auto tmp_table_id = storage_values.getStorageID(); + for (const auto & t : joined_tables.tablesWithColumns()) + uses_view_source |= (t.table.database == tmp_table_id.database_name && t.table.table == tmp_table_id.table_name); + } if (storage && joined_tables.isLeftTableSubquery()) { @@ -351,7 +363,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( { interpreter_subquery = joined_tables.makeLeftTableSubquery(options.subquery()); if (interpreter_subquery) + { source_header = interpreter_subquery->getSampleBlock(); + uses_view_source |= interpreter_subquery->usesViewSource(); + } } joined_tables.rewriteDistributedInAndJoins(query_ptr); @@ -389,9 +404,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( query.setFinal(); /// Save scalar sub queries's results in the query context - /// But discard them if the Storage has been modified - /// In an ideal situation we would only discard the scalars affected by the storage change - if (!options.only_analyze && context->hasQueryContext() && !context->getViewSource()) + /// Note that we are only saving scalars and not local_scalars since the latter can't be safely shared across contexts + if (!options.only_analyze && context->hasQueryContext()) for (const auto & it : syntax_analyzer_result->getScalars()) context->getQueryContext()->addScalar(it.first, it.second); @@ -479,6 +493,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// If there is an aggregation in the outer query, WITH TOTALS is ignored in the subquery. if (query_analyzer->hasAggregation()) interpreter_subquery->ignoreWithTotals(); + uses_view_source |= interpreter_subquery->usesViewSource(); } required_columns = syntax_analyzer_result->requiredSourceColumns(); diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index e4b3e62c358..723db59f04b 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -138,6 +138,9 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( nested_interpreters.emplace_back( buildCurrentChildInterpreter(ast->list_of_selects->children.at(query_num), require_full_header ? Names() : current_required_result_column_names)); + // We need to propagate the uses_view_source flag from children to the (self) parent since, if one of the children uses + // a view source that means that the parent uses it too and can be cached globally + uses_view_source |= nested_interpreters.back()->usesViewSource(); } /// Determine structure of the result. diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 5748a8a5dbf..fc3ef681c2c 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -479,10 +479,11 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, } /// Replacing scalar subqueries with constant values. -void executeScalarSubqueries(ASTPtr & query, ContextPtr context, size_t subquery_depth, Scalars & scalars, bool only_analyze) +void executeScalarSubqueries( + ASTPtr & query, ContextPtr context, size_t subquery_depth, Scalars & scalars, Scalars & local_scalars, bool only_analyze) { LogAST log; - ExecuteScalarSubqueriesVisitor::Data visitor_data{WithContext{context}, subquery_depth, scalars, only_analyze}; + ExecuteScalarSubqueriesVisitor::Data visitor_data{WithContext{context}, subquery_depth, scalars, local_scalars, only_analyze}; ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query); } @@ -1158,7 +1159,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates); /// Executing scalar subqueries - replacing them with constant values. - executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, select_options.only_analyze); + executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, result.local_scalars, select_options.only_analyze); if (settings.legacy_column_name_of_tuple_literal) markTupleLiteralsAsLegacy(query); @@ -1248,7 +1249,7 @@ TreeRewriterResultPtr TreeRewriter::analyze( normalize(query, result.aliases, result.source_columns_set, false, settings, allow_self_aliases); /// Executing scalar subqueries. Column defaults could be a scalar subquery. - executeScalarSubqueries(query, getContext(), 0, result.scalars, !execute_scalar_subqueries); + executeScalarSubqueries(query, getContext(), 0, result.scalars, result.local_scalars, !execute_scalar_subqueries); if (settings.legacy_column_name_of_tuple_literal) markTupleLiteralsAsLegacy(query); diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h index 7692195ec4a..45b3a5a00e3 100644 --- a/src/Interpreters/TreeRewriter.h +++ b/src/Interpreters/TreeRewriter.h @@ -75,6 +75,7 @@ struct TreeRewriterResult /// Results of scalar sub queries Scalars scalars; + Scalars local_scalars; explicit TreeRewriterResult( const NamesAndTypesList & source_columns_, diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 1c5a4310f1b..3f40167b1d1 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -22,38 +22,20 @@ namespace ErrorCodes ASTPtr ASTSelectQuery::clone() const { auto res = std::make_shared(*this); + + /** NOTE Members must clone exactly in the same order in which they were inserted into `children` in ParserSelectQuery. + * This is important because the AST hash depends on the children order and this hash is used for multiple things, + * like the column identifiers in the case of subqueries in the IN statement or caching scalar queries (reused in CTEs so it's + * important for them to have the same hash). + * For distributed query processing, in case one of the servers is localhost and the other one is not, localhost query is executed + * within the process and is cloned, and the request is sent to the remote server in text form via TCP. + * And if the cloning order does not match the parsing order then different servers will get different identifiers. + * + * Since the positions map uses we can copy it as is and ensure the new children array is created / pushed + * in the same order as the existing one */ res->children.clear(); - res->positions.clear(); - -#define CLONE(expr) res->setExpression(expr, getExpression(expr, true)) - - /** NOTE Members must clone exactly in the same order, - * in which they were inserted into `children` in ParserSelectQuery. - * This is important because of the children's names the identifier (getTreeHash) is compiled, - * which can be used for column identifiers in the case of subqueries in the IN statement. - * For distributed query processing, in case one of the servers is localhost and the other one is not, - * localhost query is executed within the process and is cloned, - * and the request is sent to the remote server in text form via TCP. - * And if the cloning order does not match the parsing order, - * then different servers will get different identifiers. - */ - CLONE(Expression::WITH); - CLONE(Expression::SELECT); - CLONE(Expression::TABLES); - CLONE(Expression::PREWHERE); - CLONE(Expression::WHERE); - CLONE(Expression::GROUP_BY); - CLONE(Expression::HAVING); - CLONE(Expression::WINDOW); - CLONE(Expression::ORDER_BY); - CLONE(Expression::LIMIT_BY_OFFSET); - CLONE(Expression::LIMIT_BY_LENGTH); - CLONE(Expression::LIMIT_BY); - CLONE(Expression::LIMIT_OFFSET); - CLONE(Expression::LIMIT_LENGTH); - CLONE(Expression::SETTINGS); - -#undef CLONE + for (const auto & child : children) + res->children.push_back(child->clone()); return res; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 6d295a0d516..9c9989dc39f 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -18,6 +18,7 @@ #include #include #include +#include namespace DB @@ -353,20 +354,26 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr ttl_table; ASTPtr settings; - if (!s_engine.ignore(pos, expected)) - return false; + bool storage_like = false; - s_eq.ignore(pos, expected); + if (s_engine.ignore(pos, expected)) + { + s_eq.ignore(pos, expected); - if (!ident_with_optional_params_p.parse(pos, engine, expected)) - return false; + if (!ident_with_optional_params_p.parse(pos, engine, expected)) + return false; + storage_like = true; + } while (true) { if (!partition_by && s_partition_by.ignore(pos, expected)) { if (expression_p.parse(pos, partition_by, expected)) + { + storage_like = true; continue; + } else return false; } @@ -374,7 +381,10 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!primary_key && s_primary_key.ignore(pos, expected)) { if (expression_p.parse(pos, primary_key, expected)) + { + storage_like = true; continue; + } else return false; } @@ -382,7 +392,10 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!order_by && s_order_by.ignore(pos, expected)) { if (expression_p.parse(pos, order_by, expected)) + { + storage_like = true; continue; + } else return false; } @@ -390,7 +403,10 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!sample_by && s_sample_by.ignore(pos, expected)) { if (expression_p.parse(pos, sample_by, expected)) + { + storage_like = true; continue; + } else return false; } @@ -398,7 +414,10 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!ttl_table && s_ttl.ignore(pos, expected)) { if (parser_ttl_list.parse(pos, ttl_table, expected)) + { + storage_like = true; continue; + } else return false; } @@ -407,10 +426,14 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (!settings_p.parse(pos, settings, expected)) return false; + storage_like = true; } break; } + // If any part of storage definition is found create storage node + if (!storage_like) + return false; auto storage = std::make_shared(); storage->set(storage->engine, engine); @@ -549,13 +572,11 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!storage_parse_result && !is_temporary) { - if (!s_as.ignore(pos, expected)) + if (s_as.ignore(pos, expected) && !table_function_p.parse(pos, as_table_function, expected)) return false; - if (!table_function_p.parse(pos, as_table_function, expected)) - { - return false; - } } + + /// Will set default table engine if Storage clause was not parsed } /** Create queries without list of columns: * - CREATE|ATTACH TABLE ... AS ... @@ -590,10 +611,6 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe } } } - else if (!storage) - { - return false; - } } auto comment = parseComment(pos, expected); @@ -625,12 +642,14 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (comment) query->set(query->comment, comment); - if (query->storage && query->columns_list && query->columns_list->primary_key) + if (query->columns_list && query->columns_list->primary_key) { - if (query->storage->primary_key) - { + /// If engine is not set will use default one + if (!query->storage) + query->set(query->storage, std::make_shared()); + else if (query->storage->primary_key) throw Exception("Multiple primary keys are not allowed.", ErrorCodes::BAD_ARGUMENTS); - } + query->storage->primary_key = query->columns_list->primary_key; } @@ -1263,8 +1282,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (is_materialized_view && !to_table) { /// Internal ENGINE for MATERIALIZED VIEW must be specified. - if (!storage_p.parse(pos, storage, expected)) - return false; + /// Actually check it in Interpreter as default_table_engine can be set + storage_p.parse(pos, storage, expected); if (s_populate.ignore(pos, expected)) is_populate = true; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 615121eae58..c48cea9c480 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -276,7 +276,7 @@ protected: class ParserIndexDeclaration : public IParserBase { public: - ParserIndexDeclaration() {} + ParserIndexDeclaration() = default; protected: const char * getName() const override { return "index declaration"; } @@ -336,7 +336,7 @@ protected: /** - * ENGINE = name [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] [SETTINGS name = value, ...] + * [ENGINE = name] [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] [SETTINGS name = value, ...] */ class ParserStorage : public IParserBase { @@ -391,7 +391,7 @@ class ParserTableOverrideDeclaration : public IParserBase { public: const bool is_standalone; - ParserTableOverrideDeclaration(bool is_standalone_ = true) : is_standalone(is_standalone_) { } + explicit ParserTableOverrideDeclaration(bool is_standalone_ = true) : is_standalone(is_standalone_) { } protected: const char * getName() const override { return "table override declaration"; } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 6d546a3b772..549fd7a6113 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -356,6 +356,7 @@ void registerInputFormatJSONEachRow(FormatFactory & factory) }); factory.registerFileExtension("ndjson", "JSONEachRow"); + factory.registerFileExtension("jsonl", "JSONEachRow"); factory.registerInputFormat("JSONStringsEachRow", []( ReadBuffer & buf, diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 1cd18087f56..f63d6fa9c46 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes TSKVRowInputFormat::TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_) - : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()) + : IRowInputFormat(std::move(header_), in_, std::move(params_)), format_settings(format_settings_), name_map(getPort().getHeader().columns()) { const auto & sample_block = getPort().getHeader(); size_t num_columns = sample_block.columns(); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index b7e6f29188d..9a9a71f9688 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -921,8 +921,8 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( auto reader_settings = getMergeTreeReaderSettings(context); - bool use_skip_indexes = context->getSettings().use_skip_indexes; - if (select.final() && !context->getSettings().use_skip_indexes_if_final) + bool use_skip_indexes = settings.use_skip_indexes; + if (select.final() && !settings.use_skip_indexes_if_final) use_skip_indexes = false; result.parts_with_ranges = MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes( diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 857f362c4be..63497ea1af4 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -121,7 +121,7 @@ void AggregatingInOrderTransform::consume(Chunk chunk) /// Add data to aggr. state if interval is not empty. Empty when haven't found current key in new block. if (key_begin != key_end) - params->aggregator.executeOnIntervalWithoutKeyImpl(variants.without_key, key_begin, key_end, aggregate_function_instructions.data(), variants.aggregates_pool); + params->aggregator.executeOnIntervalWithoutKeyImpl(variants, key_begin, key_end, aggregate_function_instructions.data(), variants.aggregates_pool); current_memory_usage = getCurrentMemoryUsage() - initial_memory_usage; diff --git a/src/Processors/Transforms/CreatingSetsTransform.h b/src/Processors/Transforms/CreatingSetsTransform.h index 839ab0cac88..48a32ea8663 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.h +++ b/src/Processors/Transforms/CreatingSetsTransform.h @@ -44,8 +44,8 @@ public: private: SubqueryForSet subquery; - std::unique_ptr executor; QueryPipeline table_out; + std::unique_ptr executor; UInt64 read_rows = 0; Stopwatch watch; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 17075e2b318..19302afb5c9 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -331,7 +331,7 @@ Chain buildPushingToViewsChain( { auto executing_inner_query = std::make_shared( storage_header, views_data->views.back(), views_data); - executing_inner_query->setRuntimeData(view_thread_status, elapsed_counter_ms); + executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); } @@ -381,7 +381,7 @@ Chain buildPushingToViewsChain( processors.emplace_front(std::move(copying_data)); processors.emplace_back(std::move(finalizing_views)); result_chain = Chain(std::move(processors)); - result_chain.setNumThreads(max_parallel_streams); + result_chain.setNumThreads(std::min(views_data->max_threads, max_parallel_streams)); } if (auto * live_view = dynamic_cast(storage.get())) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.h b/src/Processors/Transforms/buildPushingToViewsChain.h index 260fdfb3a19..98e7f19a37a 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.h +++ b/src/Processors/Transforms/buildPushingToViewsChain.h @@ -18,7 +18,7 @@ namespace DB struct ViewRuntimeData { - /// A query we should run over inserted block befire pushing into inner storage. + /// A query we should run over inserted block before pushing into inner storage. const ASTPtr query; /// This structure is expected by inner storage. Will convert query result to it. Block sample_block; diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp index 64c8a01bb9c..4ee3f2d4b82 100644 --- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp +++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp @@ -59,7 +59,7 @@ InputFormatPtr getInputFormatFromASTInsertQuery( : std::make_unique(); /// Create a source from input buffer using format from query - auto source = context->getInputFormat(ast_insert_query->format, *input_buffer, header, context->getSettings().max_insert_block_size); + auto source = context->getInputFormat(ast_insert_query->format, *input_buffer, header, context->getSettingsRef().max_insert_block_size); source->addBuffer(std::move(input_buffer)); return source; } diff --git a/src/QueryPipeline/RemoteInserter.cpp b/src/QueryPipeline/RemoteInserter.cpp index c34c625dc6d..13d087f0db9 100644 --- a/src/QueryPipeline/RemoteInserter.cpp +++ b/src/QueryPipeline/RemoteInserter.cpp @@ -24,7 +24,9 @@ RemoteInserter::RemoteInserter( const String & query_, const Settings & settings_, const ClientInfo & client_info_) - : connection(connection_), query(query_) + : connection(connection_) + , query(query_) + , server_revision(connection.getServerRevision(timeouts)) { ClientInfo modified_client_info = client_info_; modified_client_info.query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; diff --git a/src/QueryPipeline/RemoteInserter.h b/src/QueryPipeline/RemoteInserter.h index 0688b555825..5b5de962cc6 100644 --- a/src/QueryPipeline/RemoteInserter.h +++ b/src/QueryPipeline/RemoteInserter.h @@ -35,12 +35,14 @@ public: ~RemoteInserter(); const Block & getHeader() const { return header; } + UInt64 getServerRevision() const { return server_revision; } private: Connection & connection; String query; Block header; bool finished = false; + UInt64 server_revision; }; } diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 8aa729b8883..eeaf5b32a92 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -51,6 +51,7 @@ using GRPCQueryInfo = clickhouse::grpc::QueryInfo; using GRPCResult = clickhouse::grpc::Result; using GRPCException = clickhouse::grpc::Exception; using GRPCProgress = clickhouse::grpc::Progress; +using GRPCObsoleteTransportCompression = clickhouse::grpc::ObsoleteTransportCompression; namespace DB { @@ -101,62 +102,6 @@ namespace }); } - grpc_compression_algorithm parseCompressionAlgorithm(const String & str) - { - if (str == "none") - return GRPC_COMPRESS_NONE; - else if (str == "deflate") - return GRPC_COMPRESS_DEFLATE; - else if (str == "gzip") - return GRPC_COMPRESS_GZIP; - else if (str == "stream_gzip") - return GRPC_COMPRESS_STREAM_GZIP; - else - throw Exception("Unknown compression algorithm: '" + str + "'", ErrorCodes::INVALID_CONFIG_PARAMETER); - } - - grpc_compression_level parseCompressionLevel(const String & str) - { - if (str == "none") - return GRPC_COMPRESS_LEVEL_NONE; - else if (str == "low") - return GRPC_COMPRESS_LEVEL_LOW; - else if (str == "medium") - return GRPC_COMPRESS_LEVEL_MED; - else if (str == "high") - return GRPC_COMPRESS_LEVEL_HIGH; - else - throw Exception("Unknown compression level: '" + str + "'", ErrorCodes::INVALID_CONFIG_PARAMETER); - } - - grpc_compression_algorithm convertCompressionAlgorithm(const ::clickhouse::grpc::CompressionAlgorithm & algorithm) - { - if (algorithm == ::clickhouse::grpc::NO_COMPRESSION) - return GRPC_COMPRESS_NONE; - else if (algorithm == ::clickhouse::grpc::DEFLATE) - return GRPC_COMPRESS_DEFLATE; - else if (algorithm == ::clickhouse::grpc::GZIP) - return GRPC_COMPRESS_GZIP; - else if (algorithm == ::clickhouse::grpc::STREAM_GZIP) - return GRPC_COMPRESS_STREAM_GZIP; - else - throw Exception("Unknown compression algorithm: '" + ::clickhouse::grpc::CompressionAlgorithm_Name(algorithm) + "'", ErrorCodes::INVALID_GRPC_QUERY_INFO); - } - - grpc_compression_level convertCompressionLevel(const ::clickhouse::grpc::CompressionLevel & level) - { - if (level == ::clickhouse::grpc::COMPRESSION_NONE) - return GRPC_COMPRESS_LEVEL_NONE; - else if (level == ::clickhouse::grpc::COMPRESSION_LOW) - return GRPC_COMPRESS_LEVEL_LOW; - else if (level == ::clickhouse::grpc::COMPRESSION_MEDIUM) - return GRPC_COMPRESS_LEVEL_MED; - else if (level == ::clickhouse::grpc::COMPRESSION_HIGH) - return GRPC_COMPRESS_LEVEL_HIGH; - else - throw Exception("Unknown compression level: '" + ::clickhouse::grpc::CompressionLevel_Name(level) + "'", ErrorCodes::INVALID_GRPC_QUERY_INFO); - } - /// Gets file's contents as a string, throws an exception if failed. String readFile(const String & filepath) { @@ -193,6 +138,102 @@ namespace return grpc::InsecureServerCredentials(); } + /// Transport compression makes gRPC library to compress packed Result messages before sending them through network. + struct TransportCompression + { + grpc_compression_algorithm algorithm; + grpc_compression_level level; + + /// Extracts the settings of transport compression from a query info if possible. + static std::optional fromQueryInfo(const GRPCQueryInfo & query_info) + { + TransportCompression res; + if (!query_info.transport_compression_type().empty()) + { + res.setAlgorithm(query_info.transport_compression_type(), ErrorCodes::INVALID_GRPC_QUERY_INFO); + res.setLevel(query_info.transport_compression_level(), ErrorCodes::INVALID_GRPC_QUERY_INFO); + return res; + } + + if (query_info.has_obsolete_result_compression()) + { + switch (query_info.obsolete_result_compression().algorithm()) + { + case GRPCObsoleteTransportCompression::NO_COMPRESSION: res.algorithm = GRPC_COMPRESS_NONE; break; + case GRPCObsoleteTransportCompression::DEFLATE: res.algorithm = GRPC_COMPRESS_DEFLATE; break; + case GRPCObsoleteTransportCompression::GZIP: res.algorithm = GRPC_COMPRESS_GZIP; break; + case GRPCObsoleteTransportCompression::STREAM_GZIP: res.algorithm = GRPC_COMPRESS_STREAM_GZIP; break; + default: throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, "Unknown compression algorithm: {}", GRPCObsoleteTransportCompression::CompressionAlgorithm_Name(query_info.obsolete_result_compression().algorithm())); + } + + switch (query_info.obsolete_result_compression().level()) + { + case GRPCObsoleteTransportCompression::COMPRESSION_NONE: res.level = GRPC_COMPRESS_LEVEL_NONE; break; + case GRPCObsoleteTransportCompression::COMPRESSION_LOW: res.level = GRPC_COMPRESS_LEVEL_LOW; break; + case GRPCObsoleteTransportCompression::COMPRESSION_MEDIUM: res.level = GRPC_COMPRESS_LEVEL_MED; break; + case GRPCObsoleteTransportCompression::COMPRESSION_HIGH: res.level = GRPC_COMPRESS_LEVEL_HIGH; break; + default: throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, "Unknown compression level: {}", GRPCObsoleteTransportCompression::CompressionLevel_Name(query_info.obsolete_result_compression().level())); + } + return res; + } + + return std::nullopt; + } + + /// Extracts the settings of transport compression from the server configuration. + static TransportCompression fromConfiguration(const Poco::Util::AbstractConfiguration & config) + { + TransportCompression res; + if (config.has("grpc.transport_compression_type")) + { + res.setAlgorithm(config.getString("grpc.transport_compression_type"), ErrorCodes::INVALID_CONFIG_PARAMETER); + res.setLevel(config.getInt("grpc.transport_compression_level", 0), ErrorCodes::INVALID_CONFIG_PARAMETER); + } + else + { + res.setAlgorithm(config.getString("grpc.compression", "none"), ErrorCodes::INVALID_CONFIG_PARAMETER); + res.setLevel(config.getString("grpc.compression_level", "none"), ErrorCodes::INVALID_CONFIG_PARAMETER); + } + return res; + } + + private: + void setAlgorithm(const String & str, int error_code) + { + if (str == "none") + algorithm = GRPC_COMPRESS_NONE; + else if (str == "deflate") + algorithm = GRPC_COMPRESS_DEFLATE; + else if (str == "gzip") + algorithm = GRPC_COMPRESS_GZIP; + else if (str == "stream_gzip") + algorithm = GRPC_COMPRESS_STREAM_GZIP; + else + throw Exception(error_code, "Unknown compression algorithm: '{}'", str); + } + + void setLevel(const String & str, int error_code) + { + if (str == "none") + level = GRPC_COMPRESS_LEVEL_NONE; + else if (str == "low") + level = GRPC_COMPRESS_LEVEL_LOW; + else if (str == "medium") + level = GRPC_COMPRESS_LEVEL_MED; + else if (str == "high") + level = GRPC_COMPRESS_LEVEL_HIGH; + else + throw Exception(error_code, "Unknown compression level: '{}'", str); + } + + void setLevel(int level_, int error_code) + { + if (0 <= level_ && level_ < GRPC_COMPRESS_LEVEL_COUNT) + level = static_cast(level_); + else + throw Exception(error_code, "Compression level {} is out of range 0..{}", level_, GRPC_COMPRESS_LEVEL_COUNT - 1); + } + }; /// Gets session's timeout from query info or from the server config. std::chrono::steady_clock::duration getSessionTimeout(const GRPCQueryInfo & query_info, const Poco::Util::AbstractConfiguration & config) @@ -293,15 +334,10 @@ namespace return std::nullopt; } - void setResultCompression(grpc_compression_algorithm algorithm, grpc_compression_level level) + void setTransportCompression(const TransportCompression & transport_compression) { - grpc_context.set_compression_algorithm(algorithm); - grpc_context.set_compression_level(level); - } - - void setResultCompression(const ::clickhouse::grpc::Compression & compression) - { - setResultCompression(convertCompressionAlgorithm(compression.algorithm()), convertCompressionLevel(compression.level())); + grpc_context.set_compression_algorithm(transport_compression.algorithm); + grpc_context.set_compression_level(transport_compression.level); } protected: @@ -606,6 +642,9 @@ namespace void throwIfFailedToReadQueryInfo(); bool isQueryCancelled(); + void addQueryDetailsToResult(); + void addOutputFormatToResult(); + void addOutputColumnsNamesAndTypesToResult(const Block & headers); void addProgressToResult(); void addTotalsToResult(const Block & totals); void addExtremesToResult(const Block & extremes); @@ -628,10 +667,12 @@ namespace ASTInsertQuery * insert_query = nullptr; String input_format; String input_data_delimiter; + CompressionMethod input_compression_method = CompressionMethod::None; PODArray output; String output_format; - CompressionMethod compression_method = CompressionMethod::None; - int compression_level = 0; + bool send_output_columns_names_and_types = false; + CompressionMethod output_compression_method = CompressionMethod::None; + int output_compression_level = 0; uint64_t interactive_delay = 100000; bool send_exception_with_stacktrace = true; @@ -815,9 +856,9 @@ namespace if (!query_info.database().empty()) query_context->setCurrentDatabase(query_info.database()); - /// Apply compression settings for this call. - if (query_info.has_result_compression()) - responder->setResultCompression(query_info.result_compression()); + /// Apply transport compression for this call. + if (auto transport_compression = TransportCompression::fromQueryInfo(query_info)) + responder->setTransportCompression(*transport_compression); /// The interactive delay will be used to show progress. interactive_delay = settings.interactive_delay; @@ -851,9 +892,19 @@ namespace if (output_format.empty()) output_format = query_context->getDefaultFormat(); + send_output_columns_names_and_types = query_info.send_output_columns(); + /// Choose compression. - compression_method = chooseCompressionMethod("", query_info.compression_type()); - compression_level = query_info.compression_level(); + String input_compression_method_str = query_info.input_compression_type(); + if (input_compression_method_str.empty()) + input_compression_method_str = query_info.obsolete_compression_type(); + input_compression_method = chooseCompressionMethod("", input_compression_method_str); + + String output_compression_method_str = query_info.output_compression_type(); + if (output_compression_method_str.empty()) + output_compression_method_str = query_info.obsolete_compression_type(); + output_compression_method = chooseCompressionMethod("", output_compression_method_str); + output_compression_level = query_info.output_compression_level(); /// Set callback to create and fill external tables query_context->setExternalTablesInitializer([this] (ContextPtr context) @@ -984,7 +1035,7 @@ namespace return {nullptr, 0}; /// no more input data }); - read_buffer = wrapReadBufferWithCompressionMethod(std::move(read_buffer), compression_method); + read_buffer = wrapReadBufferWithCompressionMethod(std::move(read_buffer), input_compression_method); assert(!pipeline); auto source = query_context->getInputFormat( @@ -1105,6 +1156,9 @@ namespace void Call::generateOutput() { + /// We add query_id and time_zone to the first result anyway. + addQueryDetailsToResult(); + if (!io.pipeline.initialized() || io.pipeline.pushing()) return; @@ -1112,13 +1166,13 @@ namespace if (io.pipeline.pulling()) header = io.pipeline.getHeader(); - if (compression_method != CompressionMethod::None) + if (output_compression_method != CompressionMethod::None) output.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data. write_buffer = std::make_unique>>(output); nested_write_buffer = static_cast> *>(write_buffer.get()); - if (compression_method != CompressionMethod::None) + if (output_compression_method != CompressionMethod::None) { - write_buffer = wrapWriteBufferWithCompressionMethod(std::move(write_buffer), compression_method, compression_level); + write_buffer = wrapWriteBufferWithCompressionMethod(std::move(write_buffer), output_compression_method, output_compression_level); compressing_write_buffer = write_buffer.get(); } @@ -1144,6 +1198,9 @@ namespace return true; }; + addOutputFormatToResult(); + addOutputColumnsNamesAndTypesToResult(header); + Block block; while (check_for_cancel()) { @@ -1394,6 +1451,29 @@ namespace return false; } + void Call::addQueryDetailsToResult() + { + *result.mutable_query_id() = query_context->getClientInfo().current_query_id; + *result.mutable_time_zone() = DateLUT::instance().getTimeZone(); + } + + void Call::addOutputFormatToResult() + { + *result.mutable_output_format() = output_format; + } + + void Call::addOutputColumnsNamesAndTypesToResult(const Block & header) + { + if (!send_output_columns_names_and_types) + return; + for (const auto & column : header) + { + auto & name_and_type = *result.add_output_columns(); + *name_and_type.mutable_name() = column.name; + *name_and_type.mutable_type() = column.type->getName(); + } + } + void Call::addProgressToResult() { auto values = progress.fetchAndResetPiecewiseAtomically(); @@ -1414,10 +1494,10 @@ namespace return; PODArray memory; - if (compression_method != CompressionMethod::None) + if (output_compression_method != CompressionMethod::None) memory.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data. std::unique_ptr buf = std::make_unique>>(memory); - buf = wrapWriteBufferWithCompressionMethod(std::move(buf), compression_method, compression_level); + buf = wrapWriteBufferWithCompressionMethod(std::move(buf), output_compression_method, output_compression_level); auto format = query_context->getOutputFormat(output_format, *buf, totals); format->write(materializeBlock(totals)); format->finalize(); @@ -1432,10 +1512,10 @@ namespace return; PODArray memory; - if (compression_method != CompressionMethod::None) + if (output_compression_method != CompressionMethod::None) memory.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data. std::unique_ptr buf = std::make_unique>>(memory); - buf = wrapWriteBufferWithCompressionMethod(std::move(buf), compression_method, compression_level); + buf = wrapWriteBufferWithCompressionMethod(std::move(buf), output_compression_method, output_compression_level); auto format = query_context->getOutputFormat(output_format, *buf, extremes); format->write(materializeBlock(extremes)); format->finalize(); @@ -1772,8 +1852,9 @@ void GRPCServer::start() builder.RegisterService(&grpc_service); builder.SetMaxSendMessageSize(iserver.config().getInt("grpc.max_send_message_size", -1)); builder.SetMaxReceiveMessageSize(iserver.config().getInt("grpc.max_receive_message_size", -1)); - builder.SetDefaultCompressionAlgorithm(parseCompressionAlgorithm(iserver.config().getString("grpc.compression", "none"))); - builder.SetDefaultCompressionLevel(parseCompressionLevel(iserver.config().getString("grpc.compression_level", "none"))); + auto default_transport_compression = TransportCompression::fromConfiguration(iserver.config()); + builder.SetDefaultCompressionAlgorithm(default_transport_compression.algorithm); + builder.SetDefaultCompressionLevel(default_transport_compression.level); queue = builder.AddCompletionQueue(); grpc_server = builder.BuildAndStart(); diff --git a/src/Server/HTTP/HTTPServerConnection.cpp b/src/Server/HTTP/HTTPServerConnection.cpp index 7020b8e9a23..e365c9f31d0 100644 --- a/src/Server/HTTP/HTTPServerConnection.cpp +++ b/src/Server/HTTP/HTTPServerConnection.cpp @@ -36,7 +36,7 @@ void HTTPServerConnection::run() if (request.isSecure()) { - size_t hsts_max_age = context->getSettings().hsts_max_age.value; + size_t hsts_max_age = context->getSettingsRef().hsts_max_age.value; if (hsts_max_age > 0) response.add("Strict-Transport-Security", "max-age=" + std::to_string(hsts_max_age)); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 668017f8ef8..99523ff09e3 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -359,6 +359,7 @@ void TCPHandler::runImpl() return true; sendProgress(); + sendProfileEvents(); sendLogs(); return false; diff --git a/src/Server/grpc_protos/clickhouse_grpc.proto b/src/Server/grpc_protos/clickhouse_grpc.proto index c86c74535c5..4593cfff096 100644 --- a/src/Server/grpc_protos/clickhouse_grpc.proto +++ b/src/Server/grpc_protos/clickhouse_grpc.proto @@ -45,21 +45,19 @@ message ExternalTable { map settings = 5; } -enum CompressionAlgorithm { - NO_COMPRESSION = 0; - DEFLATE = 1; - GZIP = 2; - STREAM_GZIP = 3; -} - -enum CompressionLevel { - COMPRESSION_NONE = 0; - COMPRESSION_LOW = 1; - COMPRESSION_MEDIUM = 2; - COMPRESSION_HIGH = 3; -} - -message Compression { +message ObsoleteTransportCompression { + enum CompressionAlgorithm { + NO_COMPRESSION = 0; + DEFLATE = 1; + GZIP = 2; + STREAM_GZIP = 3; + } + enum CompressionLevel { + COMPRESSION_NONE = 0; + COMPRESSION_LOW = 1; + COMPRESSION_MEDIUM = 2; + COMPRESSION_HIGH = 3; + } CompressionAlgorithm algorithm = 1; CompressionLevel level = 2; } @@ -84,6 +82,9 @@ message QueryInfo { // Default output format. If not specified, 'TabSeparated' is used. string output_format = 7; + // Set it if you want the names and the types of output columns to be sent to the client. + bool send_output_columns = 24; + repeated ExternalTable external_tables = 8; string user_name = 9; @@ -102,16 +103,16 @@ message QueryInfo { // `next_query_info` is allowed to be set only if a method with streaming input (i.e. ExecuteQueryWithStreamInput() or ExecuteQueryWithStreamIO()) is used. bool next_query_info = 16; - /// Controls how a ClickHouse server will compress query execution results before sending back to the client. - /// If not set the compression settings from the configuration file will be used. - Compression result_compression = 17; - - // Compression type for `input_data`, `output_data`, `totals` and `extremes`. + // Compression type for `input_data`. // Supported compression types: none, gzip(gz), deflate, brotli(br), lzma(xz), zstd(zst), lz4, bz2. - // When used for `input_data` the client is responsible to compress data before putting it into `input_data`. - // When used for `output_data` or `totals` or `extremes` the client receives compressed data and should decompress it by itself. - // In the latter case consider to specify also `compression_level`. - string compression_type = 18; + // The client is responsible to compress data before putting it into `input_data`. + string input_compression_type = 20; + + // Compression type for `output_data`, `totals` and `extremes`. + // Supported compression types: none, gzip(gz), deflate, brotli(br), lzma(xz), zstd(zst), lz4, bz2. + // The client receives compressed data and should decompress it by itself. + // Consider also setting `output_compression_level`. + string output_compression_type = 21; // Compression level. // WARNING: If it's not specified the compression level is set to zero by default which might be not the best choice for some compression types (see below). @@ -123,7 +124,23 @@ message QueryInfo { // zstd: 1..22; 3 is recommended by default (compression level 0 also means 3) // lz4: 0..16; values < 0 mean fast acceleration // bz2: 1..9 - int32 compression_level = 19; + int32 output_compression_level = 19; + + // Transport compression is an alternative way to make the server to compress its response. + // This kind of compression implies that instead of compressing just `output` the server will compress whole packed messages of the `Result` type, + // and then gRPC implementation on client side will decompress those messages so client code won't be bothered with decompression. + // Here is a big difference between the transport compression and the compression enabled by setting `output_compression_type` because + // in case of the transport compression the client code receives already decompressed data in `output`. + // If the transport compression is not set here it can still be enabled by the server configuration. + // Supported compression types: none, deflate, gzip, stream_gzip + // Supported compression levels: 0..3 + // WARNING: Don't set `transport_compression` and `output_compression` at the same time because it will make the server to compress its output twice! + string transport_compression_type = 22; + int32 transport_compression_level = 23; + + /// Obsolete fields, should not be used in new code. + ObsoleteTransportCompression obsolete_result_compression = 17; + string obsolete_compression_type = 18; } enum LogsLevel { @@ -173,7 +190,17 @@ message Exception { // Result of execution of a query which is sent back by the ClickHouse server to the client. message Result { - // Output of the query, represented in the `output_format` or in a format specified in `query`. + string query_id = 9; + string time_zone = 10; + + // The format in which `output`, `totals` and `extremes` are written. + // It's either the same as `output_format` specified in `QueryInfo` or the format specified in the query itself. + string output_format = 11; + + // The names and types of columns of the result written in `output`. + repeated NameAndType output_columns = 12; + + // Output of the query, represented in the `output_format`. bytes output = 1; bytes totals = 2; bytes extremes = 3; diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 0c41cf71386..d7422b1ddbc 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -132,6 +132,7 @@ namespace struct DistributedHeader { + UInt64 revision = 0; Settings insert_settings; std::string insert_query; ClientInfo client_info; @@ -166,9 +167,8 @@ namespace /// Read the parts of the header. ReadBufferFromString header_buf(header_data); - UInt64 initiator_revision; - readVarUInt(initiator_revision, header_buf); - if (DBMS_TCP_PROTOCOL_VERSION < initiator_revision) + readVarUInt(distributed_header.revision, header_buf); + if (DBMS_TCP_PROTOCOL_VERSION < distributed_header.revision) { LOG_WARNING(log, "ClickHouse shard version is older than ClickHouse initiator version. It may lack support for new features."); } @@ -177,7 +177,7 @@ namespace distributed_header.insert_settings.read(header_buf); if (header_buf.hasPendingData()) - distributed_header.client_info.read(header_buf, initiator_revision); + distributed_header.client_info.read(header_buf, distributed_header.revision); if (header_buf.hasPendingData()) { @@ -188,10 +188,12 @@ namespace if (header_buf.hasPendingData()) { - NativeReader header_block_in(header_buf, DBMS_TCP_PROTOCOL_VERSION); + NativeReader header_block_in(header_buf, distributed_header.revision); distributed_header.block_header = header_block_in.read(); if (!distributed_header.block_header) - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read header from the {} batch", in.getFileName()); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, + "Cannot read header from the {} batch. Data was written with protocol version {}, current version: {}", + in.getFileName(), distributed_header.revision, DBMS_TCP_PROTOCOL_VERSION); } /// Add handling new data here, for example: @@ -264,10 +266,10 @@ namespace return nullptr; } - void writeAndConvert(RemoteInserter & remote, ReadBufferFromFile & in) + void writeAndConvert(RemoteInserter & remote, const DistributedHeader & distributed_header, ReadBufferFromFile & in) { CompressedReadBuffer decompressing_in(in); - NativeReader block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION); + NativeReader block_in(decompressing_in, distributed_header.revision); while (Block block = block_in.read()) { @@ -304,7 +306,7 @@ namespace { LOG_TRACE(log, "Processing batch {} with old format (no header)", in.getFileName()); - writeAndConvert(remote, in); + writeAndConvert(remote, distributed_header, in); return; } @@ -314,14 +316,20 @@ namespace "Structure does not match (remote: {}, local: {}), implicit conversion will be done", remote.getHeader().dumpStructure(), distributed_header.block_header.dumpStructure()); - writeAndConvert(remote, in); + writeAndConvert(remote, distributed_header, in); return; } /// If connection does not use compression, we have to uncompress the data. if (!compression_expected) { - writeAndConvert(remote, in); + writeAndConvert(remote, distributed_header, in); + return; + } + + if (distributed_header.revision != remote.getServerRevision()) + { + writeAndConvert(remote, distributed_header, in); return; } @@ -915,10 +923,10 @@ public: { in = std::make_unique(file_name); decompressing_in = std::make_unique(*in); - block_in = std::make_unique(*decompressing_in, DBMS_TCP_PROTOCOL_VERSION); log = &Poco::Logger::get("DirectoryMonitorSource"); - readDistributedHeader(*in, log); + auto distributed_header = readDistributedHeader(*in, log); + block_in = std::make_unique(*decompressing_in, distributed_header.revision); first_block = block_in->read(); } @@ -1040,7 +1048,7 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map LOG_DEBUG(log, "Processing batch {} with old format (no header/rows)", in.getFileName()); CompressedReadBuffer decompressing_in(in); - NativeReader block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION); + NativeReader block_in(decompressing_in, distributed_header.revision); while (Block block = block_in.read()) { diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index d40838ad141..7b07e929c76 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -372,44 +372,47 @@ String HDFSSource::getName() const Chunk HDFSSource::generate() { - if (!reader) - return {}; - - Chunk chunk; - if (reader->pull(chunk)) + while (true) { - Columns columns = chunk.getColumns(); - UInt64 num_rows = chunk.getNumRows(); + if (!reader || isCancelled()) + break; - /// Enrich with virtual columns. - if (need_path_column) + Chunk chunk; + if (reader->pull(chunk)) { - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, current_path); - columns.push_back(column->convertToFullColumnIfConst()); + Columns columns = chunk.getColumns(); + UInt64 num_rows = chunk.getNumRows(); + + /// Enrich with virtual columns. + if (need_path_column) + { + auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, current_path); + columns.push_back(column->convertToFullColumnIfConst()); + } + + if (need_file_column) + { + size_t last_slash_pos = current_path.find_last_of('/'); + auto file_name = current_path.substr(last_slash_pos + 1); + + auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, std::move(file_name)); + columns.push_back(column->convertToFullColumnIfConst()); + } + + return Chunk(std::move(columns), num_rows); } - if (need_file_column) { - size_t last_slash_pos = current_path.find_last_of('/'); - auto file_name = current_path.substr(last_slash_pos + 1); + std::lock_guard lock(reader_mutex); + reader.reset(); + pipeline.reset(); + read_buf.reset(); - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, std::move(file_name)); - columns.push_back(column->convertToFullColumnIfConst()); + if (!initialize()) + break; } - - return Chunk(std::move(columns), num_rows); } - - { - std::lock_guard lock(reader_mutex); - reader.reset(); - pipeline.reset(); - read_buf.reset(); - - if (!initialize()) - return {}; - } - return generate(); + return {}; } diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index dfe1ea6ffd3..7b370b7e63f 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -69,7 +69,7 @@ Pipe StorageHDFSCluster::read( size_t /*max_block_size*/, unsigned /*num_streams*/) { - auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettings()); + auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); auto iterator = std::make_shared(context, uri); auto callback = std::make_shared([iterator]() mutable -> String diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 8ce65211e3e..323b59e2902 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1292,8 +1292,8 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, ContextPtr context, key_expr_type_not_null = key_expr_type; bool cast_not_needed = is_set_const /// Set args are already casted inside Set::createFromAST - || ((isNativeNumber(key_expr_type_not_null) || isDateTime(key_expr_type_not_null)) - && (isNativeNumber(const_type) || isDateTime(const_type))); /// Numbers and DateTime are accurately compared without cast. + || ((isNativeInteger(key_expr_type_not_null) || isDateTime(key_expr_type_not_null)) + && (isNativeInteger(const_type) || isDateTime(const_type))); /// Native integers and DateTime are accurately compared without cast. if (!cast_not_needed && !key_expr_type_not_null->equals(*const_type)) { diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index 5e8f2a9e132..a42ea4ceff6 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -1,5 +1,7 @@ #include "PartitionedSink.h" +#include + #include #include @@ -40,19 +42,18 @@ PartitionedSink::PartitionedSink( } -SinkPtr PartitionedSink::getSinkForPartition(const String & partition_id) +SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key) { - auto it = sinks.find(partition_id); - if (it == sinks.end()) + auto it = partition_id_to_sink.find(partition_key); + if (it == partition_id_to_sink.end()) { - auto sink = createSinkForPartition(partition_id); - std::tie(it, std::ignore) = sinks.emplace(partition_id, sink); + auto sink = createSinkForPartition(partition_key.toString()); + std::tie(it, std::ignore) = partition_id_to_sink.emplace(partition_key, sink); } return it->second; } - void PartitionedSink::consume(Chunk chunk) { const auto & columns = chunk.getColumns(); @@ -61,45 +62,59 @@ void PartitionedSink::consume(Chunk chunk) block_with_partition_by_expr.setColumns(columns); partition_by_expr->execute(block_with_partition_by_expr); - const auto * column = block_with_partition_by_expr.getByName(partition_by_column_name).column.get(); + const auto * partition_by_result_column = block_with_partition_by_expr.getByName(partition_by_column_name).column.get(); - std::unordered_map sub_chunks_indices; - IColumn::Selector selector; - for (size_t row = 0; row < chunk.getNumRows(); ++row) + size_t chunk_rows = chunk.getNumRows(); + chunk_row_index_to_partition_index.resize(chunk_rows); + + partition_id_to_chunk_index.clear(); + + for (size_t row = 0; row < chunk_rows; ++row) { - auto value = column->getDataAt(row); - auto [it, inserted] = sub_chunks_indices.emplace(value, sub_chunks_indices.size()); - selector.push_back(it->second); + auto partition_key = partition_by_result_column->getDataAt(row); + auto [it, inserted] = partition_id_to_chunk_index.insert(makePairNoInit(partition_key, partition_id_to_chunk_index.size())); + if (inserted) + it->value.first = copyStringInArena(partition_keys_arena, partition_key); + + chunk_row_index_to_partition_index[row] = it->getMapped(); } - Chunks sub_chunks; - sub_chunks.reserve(sub_chunks_indices.size()); - for (size_t column_index = 0; column_index < columns.size(); ++column_index) + size_t columns_size = columns.size(); + size_t partitions_size = partition_id_to_chunk_index.size(); + + Chunks partition_index_to_chunk; + partition_index_to_chunk.reserve(partitions_size); + + for (size_t column_index = 0; column_index < columns_size; ++column_index) { - MutableColumns column_sub_chunks = columns[column_index]->scatter(sub_chunks_indices.size(), selector); - if (column_index == 0) /// Set sizes for sub-chunks. + MutableColumns partition_index_to_column_split = columns[column_index]->scatter(partitions_size, chunk_row_index_to_partition_index); + + /// Add chunks into partition_index_to_chunk with sizes of result columns + if (column_index == 0) { - for (const auto & column_sub_chunk : column_sub_chunks) + for (const auto & partition_column : partition_index_to_column_split) { - sub_chunks.emplace_back(Columns(), column_sub_chunk->size()); + partition_index_to_chunk.emplace_back(Columns(), partition_column->size()); } } - for (size_t sub_chunk_index = 0; sub_chunk_index < column_sub_chunks.size(); ++sub_chunk_index) + + for (size_t partition_index = 0; partition_index < partitions_size; ++partition_index) { - sub_chunks[sub_chunk_index].addColumn(std::move(column_sub_chunks[sub_chunk_index])); + partition_index_to_chunk[partition_index].addColumn(std::move(partition_index_to_column_split[partition_index])); } } - for (const auto & [partition_id, sub_chunk_index] : sub_chunks_indices) + for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index) { - getSinkForPartition(partition_id)->consume(std::move(sub_chunks[sub_chunk_index])); + auto sink = getSinkForPartitionKey(partition_key); + sink->consume(std::move(partition_index_to_chunk[partition_index])); } } void PartitionedSink::onFinish() { - for (auto & [partition_id, sink] : sinks) + for (auto & [_, sink] : partition_id_to_sink) { sink->onFinish(); } diff --git a/src/Storages/PartitionedSink.h b/src/Storages/PartitionedSink.h index bc59a603fac..7ed29f1b197 100644 --- a/src/Storages/PartitionedSink.h +++ b/src/Storages/PartitionedSink.h @@ -1,5 +1,8 @@ #pragma once +#include +#include +#include #include #include #include @@ -34,9 +37,13 @@ private: ExpressionActionsPtr partition_by_expr; String partition_by_column_name; - std::unordered_map sinks; + absl::flat_hash_map partition_id_to_sink; + HashMapWithSavedHash partition_id_to_chunk_index; + IColumn::Selector chunk_row_index_to_partition_index; + Arena partition_keys_arena; + + SinkPtr getSinkForPartitionKey(StringRef partition_key); - SinkPtr getSinkForPartition(const String & partition_id); }; } diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index eae46220c86..c2f6fb1608d 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -66,6 +66,7 @@ StoragePtr StorageFactory::get( bool has_force_restore_data_flag) const { String name, comment; + ASTStorage * storage_def = query.storage; bool has_engine_args = false; @@ -107,7 +108,10 @@ StoragePtr StorageFactory::get( } else { - if (!storage_def) + if (!query.storage) + throw Exception("Incorrect CREATE query: storage required", ErrorCodes::INCORRECT_QUERY); + + if (!storage_def->engine) throw Exception("Incorrect CREATE query: ENGINE required", ErrorCodes::ENGINE_REQUIRED); const ASTFunction & engine_def = *storage_def->engine; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index edd5e0447d5..9a2ec0789cd 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -199,18 +199,27 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user fs_table_path = user_files_absolute_path / fs_table_path; Strings paths; + /// Do not use fs::canonical or fs::weakly_canonical. /// Otherwise it will not allow to work with symlinks in `user_files_path` directory. String path = fs::absolute(fs_table_path).lexically_normal(); /// Normalize path. - if (path.find_first_of("*?{") == std::string::npos) + + if (path.find(PartitionedSink::PARTITION_ID_WILDCARD) != std::string::npos) + { + paths.push_back(path); + } + else if (path.find_first_of("*?{") == std::string::npos) { std::error_code error; if (fs::exists(path)) total_bytes_to_read += fs::file_size(path, error); + paths.push_back(path); } else + { paths = listFilesWithRegexpMatching("/", path, total_bytes_to_read); + } for (const auto & cur_path : paths) checkCreationIsAllowed(context, user_files_absolute_path, cur_path); @@ -313,7 +322,11 @@ StorageFile::StorageFile(const std::string & table_path_, const std::string & us is_db_table = false; paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read); is_path_with_globs = paths.size() > 1; - path_for_partitioned_write = table_path_; + if (!paths.empty()) + path_for_partitioned_write = paths.front(); + else + path_for_partitioned_write = table_path_; + setStorageMetadata(args); } @@ -853,6 +866,7 @@ SinkToStoragePtr StorageFile::write( { if (path_for_partitioned_write.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty path for partitioned write"); + fs::create_directories(fs::path(path_for_partitioned_write).parent_path()); return std::make_shared( @@ -879,9 +893,10 @@ SinkToStoragePtr StorageFile::write( path = paths.back(); fs::create_directories(fs::path(path).parent_path()); + std::error_code error_code; if (!context->getSettingsRef().engine_file_truncate_on_insert && !is_path_with_globs && !FormatFactory::instance().checkIfFormatSupportAppend(format_name, context, format_settings) && fs::exists(paths.back()) - && fs::file_size(paths.back()) != 0) + && fs::file_size(paths.back(), error_code) != 0 && !error_code) { if (context->getSettingsRef().engine_file_allow_create_multiple_files) { diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 5f0bd240f64..5ba1514877a 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -732,8 +732,21 @@ void StorageLog::rename(const String & new_path_to_table_data, const StorageID & renameInMemory(new_table_id); } -void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) +static std::chrono::seconds getLockTimeout(ContextPtr context) { + const Settings & settings = context->getSettingsRef(); + Int64 lock_timeout = settings.lock_acquire_timeout.totalSeconds(); + if (settings.max_execution_time.totalSeconds() != 0 && settings.max_execution_time.totalSeconds() < lock_timeout) + lock_timeout = settings.max_execution_time.totalSeconds(); + return std::chrono::seconds{lock_timeout}; +} + +void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr context, TableExclusiveLockHolder &) +{ + WriteLock lock{rwlock, getLockTimeout(context)}; + if (!lock) + throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + disk->clearDirectory(table_path); for (auto & data_file : data_files) @@ -750,16 +763,6 @@ void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr } -static std::chrono::seconds getLockTimeout(ContextPtr context) -{ - const Settings & settings = context->getSettingsRef(); - Int64 lock_timeout = settings.lock_acquire_timeout.totalSeconds(); - if (settings.max_execution_time.totalSeconds() != 0 && settings.max_execution_time.totalSeconds() < lock_timeout) - lock_timeout = settings.max_execution_time.totalSeconds(); - return std::chrono::seconds{lock_timeout}; -} - - Pipe StorageLog::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 77d4952291c..9a85644d825 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -302,40 +302,42 @@ String StorageS3Source::getName() const Chunk StorageS3Source::generate() { - if (!reader) - return {}; - - Chunk chunk; - if (reader->pull(chunk)) + while (true) { - UInt64 num_rows = chunk.getNumRows(); + if (!reader || isCancelled()) + break; - if (with_path_column) - chunk.addColumn(DataTypeLowCardinality{std::make_shared()} - .createColumnConst(num_rows, file_path) - ->convertToFullColumnIfConst()); - if (with_file_column) + Chunk chunk; + if (reader->pull(chunk)) { - size_t last_slash_pos = file_path.find_last_of('/'); - chunk.addColumn(DataTypeLowCardinality{std::make_shared()} - .createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)) - ->convertToFullColumnIfConst()); + UInt64 num_rows = chunk.getNumRows(); + + if (with_path_column) + chunk.addColumn(DataTypeLowCardinality{std::make_shared()} + .createColumnConst(num_rows, file_path) + ->convertToFullColumnIfConst()); + if (with_file_column) + { + size_t last_slash_pos = file_path.find_last_of('/'); + chunk.addColumn(DataTypeLowCardinality{std::make_shared()} + .createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)) + ->convertToFullColumnIfConst()); + } + + return chunk; } - return chunk; + { + std::lock_guard lock(reader_mutex); + reader.reset(); + pipeline.reset(); + read_buf.reset(); + + if (!initialize()) + break; + } } - - { - std::lock_guard lock(reader_mutex); - reader.reset(); - pipeline.reset(); - read_buf.reset(); - - if (!initialize()) - return {}; - } - - return generate(); + return {}; } static bool checkIfObjectExists(const std::shared_ptr & client, const String & bucket, const String & key) @@ -385,13 +387,18 @@ public: const String & bucket, const String & key, size_t min_upload_part_size, + size_t upload_part_size_multiply_factor, + size_t upload_part_size_multiply_parts_count_threshold, size_t max_single_part_upload_size) : SinkToStorage(sample_block_) , sample_block(sample_block_) , format_settings(format_settings_) { write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique(client, bucket, key, min_upload_part_size, max_single_part_upload_size), compression_method, 3); + std::make_unique( + client, bucket, key, min_upload_part_size, + upload_part_size_multiply_factor, upload_part_size_multiply_parts_count_threshold, + max_single_part_upload_size), compression_method, 3); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, {}, format_settings); } @@ -440,6 +447,8 @@ public: const String & bucket_, const String & key_, size_t min_upload_part_size_, + size_t upload_part_size_multiply_factor_, + size_t upload_part_size_multiply_parts_count_threshold_, size_t max_single_part_upload_size_) : PartitionedSink(partition_by, context_, sample_block_) , format(format_) @@ -450,6 +459,8 @@ public: , bucket(bucket_) , key(key_) , min_upload_part_size(min_upload_part_size_) + , upload_part_size_multiply_factor(upload_part_size_multiply_factor_) + , upload_part_size_multiply_parts_count_threshold(upload_part_size_multiply_parts_count_threshold_) , max_single_part_upload_size(max_single_part_upload_size_) , format_settings(format_settings_) { @@ -473,6 +484,8 @@ public: partition_bucket, partition_key, min_upload_part_size, + upload_part_size_multiply_factor, + upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size ); } @@ -487,6 +500,8 @@ private: const String bucket; const String key; size_t min_upload_part_size; + size_t upload_part_size_multiply_factor; + size_t upload_part_size_multiply_parts_count_threshold; size_t max_single_part_upload_size; std::optional format_settings; @@ -527,6 +542,8 @@ StorageS3::StorageS3( const String & format_name_, UInt64 max_single_read_retries_, UInt64 min_upload_part_size_, + UInt64 upload_part_size_multiply_factor_, + UInt64 upload_part_size_multiply_parts_count_threshold_, UInt64 max_single_part_upload_size_, UInt64 max_connections_, const ColumnsDescription & columns_, @@ -543,6 +560,8 @@ StorageS3::StorageS3( , format_name(format_name_) , max_single_read_retries(max_single_read_retries_) , min_upload_part_size(min_upload_part_size_) + , upload_part_size_multiply_factor(upload_part_size_multiply_factor_) + , upload_part_size_multiply_parts_count_threshold(upload_part_size_multiply_parts_count_threshold_) , max_single_part_upload_size(max_single_part_upload_size_) , compression_method(compression_method_) , name(uri_.storage_name) @@ -669,6 +688,8 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr client_auth.uri.bucket, keys.back(), min_upload_part_size, + upload_part_size_multiply_factor, + upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size); } else @@ -712,6 +733,8 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr client_auth.uri.bucket, keys.back(), min_upload_part_size, + upload_part_size_multiply_factor, + upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size); } } @@ -923,7 +946,10 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) S3::URI s3_uri(Poco::URI(configuration.url)); auto max_single_read_retries = args.getLocalContext()->getSettingsRef().s3_max_single_read_retries; auto min_upload_part_size = args.getLocalContext()->getSettingsRef().s3_min_upload_part_size; + auto upload_part_size_multiply_factor = args.getLocalContext()->getSettingsRef().s3_upload_part_size_multiply_factor; + auto upload_part_size_multiply_parts_count_threshold = args.getLocalContext()->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold; auto max_single_part_upload_size = args.getLocalContext()->getSettingsRef().s3_max_single_part_upload_size; + auto max_connections = args.getLocalContext()->getSettingsRef().s3_max_connections; ASTPtr partition_by; @@ -938,6 +964,8 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) configuration.format, max_single_read_retries, min_upload_part_size, + upload_part_size_multiply_factor, + upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size, max_connections, args.columns, diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 427b3af285b..03b54706b4a 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -126,6 +126,8 @@ public: const String & format_name_, UInt64 max_single_read_retries_, UInt64 min_upload_part_size_, + UInt64 upload_part_size_multiply_factor_, + UInt64 upload_part_size_multiply_parts_count_threshold_, UInt64 max_single_part_upload_size_, UInt64 max_connections_, const ColumnsDescription & columns_, @@ -193,6 +195,8 @@ private: String format_name; UInt64 max_single_read_retries; size_t min_upload_part_size; + size_t upload_part_size_multiply_factor; + size_t upload_part_size_multiply_parts_count_threshold; size_t max_single_part_upload_size; String compression_method; String name; diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 762eb079c1c..57220c68347 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -82,7 +82,7 @@ Pipe StorageS3Cluster::read( { StorageS3::updateClientAndAuthSettings(context, client_auth); - auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettings()); + auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); StorageS3::updateClientAndAuthSettings(context, client_auth); auto iterator = std::make_shared(*client_auth.client, client_auth.uri); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index dd2736613b3..508c9d8b157 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -36,6 +36,14 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NETWORK_ERROR; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + + +static bool urlWithGlobs(const String & uri) +{ + return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) + || uri.find('|') != std::string::npos; } @@ -62,6 +70,7 @@ IStorageURLBase::IStorageURLBase( , partition_by(partition_by_) { StorageInMemoryMetadata storage_metadata; + if (columns_.empty()) { auto columns = getTableStructureFromData(format_name, uri, compression_method, headers, format_settings, context_); @@ -69,52 +78,12 @@ IStorageURLBase::IStorageURLBase( } else storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); } -ColumnsDescription IStorageURLBase::getTableStructureFromData( - const String & format, - const String & uri, - const String & compression_method, - const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers, - const std::optional & format_settings, - ContextPtr context) -{ - auto parsed_uri = Poco::URI(uri); - Poco::Net::HTTPBasicCredentials credentials; - std::string user_info = parsed_uri.getUserInfo(); - if (!user_info.empty()) - { - std::size_t n = user_info.find(':'); - if (n != std::string::npos) - { - credentials.setUsername(user_info.substr(0, n)); - credentials.setPassword(user_info.substr(n + 1)); - } - } - - auto read_buffer_creator = [&]() - { - return wrapReadBufferWithCompressionMethod( - std::make_unique( - parsed_uri, - Poco::Net::HTTPRequest::HTTP_GET, - nullptr, - ConnectionTimeouts::getHTTPTimeouts(context), - credentials, - context->getSettingsRef().max_http_get_redirects, - DBMS_DEFAULT_BUFFER_SIZE, - context->getReadSettings(), - headers, - ReadWriteBufferFromHTTP::Range{}, - context->getRemoteHostFilter()), - chooseCompressionMethod(parsed_uri.getPath(), compression_method)); - }; - - return readSchemaFromFormat(format, format_settings, read_buffer_creator, context); -} namespace { @@ -163,6 +132,20 @@ namespace reader->cancel(); } + static void setCredentials(Poco::Net::HTTPBasicCredentials & credentials, const Poco::URI & request_uri) + { + const auto & user_info = request_uri.getUserInfo(); + if (!user_info.empty()) + { + std::size_t n = user_info.find(':'); + if (n != std::string::npos) + { + credentials.setUsername(user_info.substr(0, n)); + credentials.setPassword(user_info.substr(n + 1)); + } + } + } + StorageURLSource( URIInfoPtr uri_info_, const std::string & http_method, @@ -177,7 +160,8 @@ namespace const ConnectionTimeouts & timeouts, const String & compression_method, const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_ = {}, - const URIParams & params = {}) + const URIParams & params = {}, + bool glob_url = false) : SourceWithProgress(sample_block), name(std::move(name_)) , uri_info(uri_info_) { @@ -186,53 +170,43 @@ namespace /// Lazy initialization. We should not perform requests in constructor, because we need to do it in query pipeline. initialize = [=, this](const URIInfo::FailoverOptions & uri_options) { - WriteBufferFromOwnString error_message; - for (auto option = uri_options.begin(); option < uri_options.end(); ++option) + if (uri_options.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty url list"); + + if (uri_options.size() > 1) { - auto request_uri = Poco::URI(*option); + read_buf = getFirstAvailableURLReadBuffer( + uri_options, context, params, http_method, + callback, timeouts, compression_method, credentials, headers); + } + else + { + ReadSettings read_settings = context->getReadSettings(); + bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs; + auto request_uri = Poco::URI(uri_options[0]); + for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); - try - { - std::string user_info = request_uri.getUserInfo(); - if (!user_info.empty()) - { - std::size_t n = user_info.find(':'); - if (n != std::string::npos) - { - credentials.setUsername(user_info.substr(0, n)); - credentials.setPassword(user_info.substr(n + 1)); - } - } + setCredentials(credentials, request_uri); - /// Get first alive uri. - read_buf = wrapReadBufferWithCompressionMethod( - std::make_unique( - request_uri, - http_method, - callback, - timeouts, - credentials, - context->getSettingsRef().max_http_get_redirects, - DBMS_DEFAULT_BUFFER_SIZE, - context->getReadSettings(), - headers, - ReadWriteBufferFromHTTP::Range{}, - context->getRemoteHostFilter()), - chooseCompressionMethod(request_uri.getPath(), compression_method)); - } - catch (...) - { - if (uri_options.size() == 1) - throw; - - if (option == uri_options.end() - 1) - throw Exception(ErrorCodes::NETWORK_ERROR, "All uri options are unreachable. {}", error_message.str()); - - error_message << *option << " error: " << getCurrentExceptionMessage(false) << "\n"; - tryLogCurrentException(__PRETTY_FUNCTION__); - } + read_buf = wrapReadBufferWithCompressionMethod( + std::make_unique( + request_uri, + http_method, + callback, + timeouts, + credentials, + context->getSettingsRef().max_http_get_redirects, + DBMS_DEFAULT_BUFFER_SIZE, + read_settings, + headers, + ReadWriteBufferFromHTTP::Range{}, + context->getRemoteHostFilter(), + /* delay_initiliazation */true, + /* use_external_buffer */false, + /* skip_url_not_found_error */skip_url_not_found_error), + chooseCompressionMethod(request_uri.getPath(), compression_method)); } auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings); @@ -283,6 +257,65 @@ namespace } } + static std::unique_ptr getFirstAvailableURLReadBuffer( + const std::vector & urls, + ContextPtr context, + const URIParams & params, + const String & http_method, + std::function callback, + const ConnectionTimeouts & timeouts, + const String & compression_method, + Poco::Net::HTTPBasicCredentials & credentials, + const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers) + { + String first_exception_message; + ReadSettings read_settings = context->getReadSettings(); + + for (auto option = urls.begin(); option != urls.end(); ++option) + { + bool skip_url_not_found_error = read_settings.http_skip_not_found_url_for_globs && option == std::prev(urls.end()); + auto request_uri = Poco::URI(*option); + + for (const auto & [param, value] : params) + request_uri.addQueryParameter(param, value); + + setCredentials(credentials, request_uri); + + try + { + return wrapReadBufferWithCompressionMethod( + std::make_unique( + request_uri, + http_method, + callback, + timeouts, + credentials, + context->getSettingsRef().max_http_get_redirects, + DBMS_DEFAULT_BUFFER_SIZE, + read_settings, + headers, + ReadWriteBufferFromHTTP::Range{}, + context->getRemoteHostFilter(), + /* delay_initiliazation */false, + /* use_external_buffer */false, + /* skip_url_not_found_error */skip_url_not_found_error), + chooseCompressionMethod(request_uri.getPath(), compression_method)); + } + catch (...) + { + if (first_exception_message.empty()) + first_exception_message = getCurrentExceptionMessage(false); + + if (urls.size() == 1) + throw; + + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + throw Exception(ErrorCodes::NETWORK_ERROR, "All uri ({}) options are unreachable: {}", urls.size(), first_exception_message); + } + private: using InitializeFunc = std::function; InitializeFunc initialize; @@ -297,7 +330,7 @@ namespace /// have R/W access to reader pointer. std::mutex reader_mutex; - Poco::Net::HTTPBasicCredentials credentials{}; + Poco::Net::HTTPBasicCredentials credentials; }; } @@ -313,9 +346,10 @@ StorageURLSink::StorageURLSink( : SinkToStorage(sample_block) { std::string content_type = FormatFactory::instance().getContentType(format, context, format_settings); + std::string content_encoding = toContentEncodingName(compression_method); write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique(Poco::URI(uri), http_method, content_type, timeouts), + std::make_unique(Poco::URI(uri), http_method, content_type, content_encoding, timeouts), compression_method, 3); writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block, context, {} /* write callback */, format_settings); @@ -407,6 +441,71 @@ std::function IStorageURLBase::getReadPOSTDataCallback( } +ColumnsDescription IStorageURLBase::getTableStructureFromData( + const String & format, + const String & uri, + const String & compression_method, + const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers, + const std::optional & format_settings, + ContextPtr context) +{ + ReadBufferCreator read_buffer_creator; + Poco::Net::HTTPBasicCredentials credentials; + + if (urlWithGlobs(uri)) + { + std::vector urls_to_check; + + size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements; + auto uri_descriptions = parseRemoteDescription(uri, 0, uri.size(), ',', max_addresses); + for (const auto & description : uri_descriptions) + { + auto options = parseRemoteDescription(description, 0, description.size(), '|', max_addresses); + urls_to_check.insert(urls_to_check.end(), options.begin(), options.end()); + } + + read_buffer_creator = [&, urls_to_check]() + { + return StorageURLSource::getFirstAvailableURLReadBuffer( + urls_to_check, + context, + {}, + Poco::Net::HTTPRequest::HTTP_GET, + {}, + ConnectionTimeouts::getHTTPTimeouts(context), + compression_method, + credentials, + headers); + }; + } + else + { + read_buffer_creator = [&]() + { + auto parsed_uri = Poco::URI(uri); + StorageURLSource::setCredentials(credentials, parsed_uri); + + return wrapReadBufferWithCompressionMethod( + std::make_unique( + parsed_uri, + Poco::Net::HTTPRequest::HTTP_GET, + nullptr, + ConnectionTimeouts::getHTTPTimeouts(context), + credentials, + context->getSettingsRef().max_http_get_redirects, + DBMS_DEFAULT_BUFFER_SIZE, + context->getReadSettings(), + headers, + ReadWriteBufferFromHTTP::Range{}, + context->getRemoteHostFilter(), + /* delay_initiliazation */true), + chooseCompressionMethod(parsed_uri.getPath(), compression_method)); + }; + } + + return readSchemaFromFormat(format, format_settings, read_buffer_creator, context); +} + Pipe IStorageURLBase::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -417,10 +516,8 @@ Pipe IStorageURLBase::read( unsigned num_streams) { auto params = getReadURIParams(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size); - bool with_globs = (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) - || uri.find('|') != std::string::npos; - if (with_globs) + if (urlWithGlobs(uri)) { size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements; auto uri_descriptions = parseRemoteDescription(uri, 0, uri.size(), ',', max_addresses); @@ -452,7 +549,7 @@ Pipe IStorageURLBase::read( metadata_snapshot->getColumns(), max_block_size, ConnectionTimeouts::getHTTPTimeouts(local_context), - compression_method, headers, params)); + compression_method, headers, params, /* glob_url */true)); } return Pipe::unitePipes(std::move(pipes)); } diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.h b/src/Storages/System/StorageSystemAsynchronousInserts.h index 79f19ec3d97..d25217006db 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.h +++ b/src/Storages/System/StorageSystemAsynchronousInserts.h @@ -14,7 +14,7 @@ class StorageSystemAsynchronousInserts final : public IStorageSystemOneBlock { public: - std::string getName() const override { return "AsynchronousInserts"; } + std::string getName() const override { return "SystemAsynchronousInserts"; } static NamesAndTypesList getNamesAndTypes(); protected: diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 6c8159ca720..f4dd9cbd45d 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -66,7 +66,7 @@ StoragesInfo::getParts(MergeTreeData::DataPartStateVector & state, bool has_stat } StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) - : query_id(context->getCurrentQueryId()), settings(context->getSettings()) + : query_id(context->getCurrentQueryId()), settings(context->getSettingsRef()) { /// Will apply WHERE to subset of columns and then add more columns. /// This is kind of complicated, but we use WHERE to do less work. diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index d61baf2eb63..f567bf6eefc 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -132,11 +132,11 @@ void registerStorages() registerStorageKafka(factory); #endif -#if USE_FILELOG + #if USE_FILELOG registerStorageFileLog(factory); -#endif + #endif -#if USE_AMQPCPP + #if USE_AMQPCPP registerStorageRabbitMQ(factory); #endif diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 85857011616..90fbb079bb6 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -205,7 +205,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr if (name != "clusterAllReplicas") cluster = context->getCluster(cluster_name_expanded); else - cluster = context->getCluster(cluster_name_expanded)->getClusterWithReplicasAsShards(context->getSettings()); + cluster = context->getCluster(cluster_name_expanded)->getClusterWithReplicasAsShards(context->getSettingsRef()); } else { @@ -241,7 +241,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr bool treat_local_as_remote = false; bool treat_local_port_as_remote = context->getApplicationType() == Context::ApplicationType::LOCAL; cluster = std::make_shared( - context->getSettings(), + context->getSettingsRef(), names, configuration.username, configuration.password, diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index e1e31b5efc3..f91ce36c3c4 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -148,6 +148,8 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context S3::URI s3_uri (uri); UInt64 max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries; UInt64 min_upload_part_size = context->getSettingsRef().s3_min_upload_part_size; + UInt64 upload_part_size_multiply_factor = context->getSettingsRef().s3_upload_part_size_multiply_factor; + UInt64 upload_part_size_multiply_parts_count_threshold = context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold; UInt64 max_single_part_upload_size = context->getSettingsRef().s3_max_single_part_upload_size; UInt64 max_connections = context->getSettingsRef().s3_max_connections; @@ -163,6 +165,8 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context s3_configuration->format, max_single_read_retries, min_upload_part_size, + upload_part_size_multiply_factor, + upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size, max_connections, getActualTableStructure(context), diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index aa3ae20b61d..bc215b578b9 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -109,6 +109,8 @@ StoragePtr TableFunctionS3Cluster::executeImpl( /// Actually this parameters are not used UInt64 max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries; UInt64 min_upload_part_size = context->getSettingsRef().s3_min_upload_part_size; + UInt64 upload_part_size_multiply_factor = context->getSettingsRef().s3_upload_part_size_multiply_factor; + UInt64 upload_part_size_multiply_parts_count_threshold = context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold; UInt64 max_single_part_upload_size = context->getSettingsRef().s3_max_single_part_upload_size; UInt64 max_connections = context->getSettingsRef().s3_max_connections; storage = StorageS3::create( @@ -119,6 +121,8 @@ StoragePtr TableFunctionS3Cluster::executeImpl( format, max_single_read_retries, min_upload_part_size, + upload_part_size_multiply_factor, + upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size, max_connections, getActualTableStructure(context), diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 042e0e90459..319a6fc3fa5 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -98,6 +98,7 @@ if __name__ == "__main__": 'server.log': os.path.join(workspace_path, 'server.log'), 'fuzzer.log': os.path.join(workspace_path, 'fuzzer.log'), 'report.html': os.path.join(workspace_path, 'report.html'), + 'core.gz': os.path.join(workspace_path, 'core.gz'), } s3_helper = S3Helper('https://s3.amazonaws.com') diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index a908f5fe11c..140ede3067f 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -3,10 +3,11 @@ import argparse import json import logging import os +import platform import shutil import subprocess import time -from typing import List, Optional, Set, Tuple, Union +from typing import Dict, List, Optional, Set, Tuple, Union from github import Github @@ -23,24 +24,32 @@ NAME = "Push to Dockerhub (actions)" TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check") +ImagesDict = Dict[str, dict] + class DockerImage: def __init__( self, path: str, repo: str, + only_amd64: bool, parent: Optional["DockerImage"] = None, gh_repo_path: str = GITHUB_WORKSPACE, ): self.path = path self.full_path = os.path.join(gh_repo_path, path) self.repo = repo + self.only_amd64 = only_amd64 self.parent = parent self.built = False def __eq__(self, other) -> bool: # type: ignore """Is used to check if DockerImage is in a set or not""" - return self.path == other.path and self.repo == self.repo + return ( + self.path == other.path + and self.repo == self.repo + and self.only_amd64 == other.only_amd64 + ) def __lt__(self, other) -> bool: if not isinstance(other, DockerImage): @@ -65,9 +74,8 @@ class DockerImage: return f"DockerImage(path={self.path},repo={self.repo},parent={self.parent})" -def get_changed_docker_images( - pr_info: PRInfo, repo_path: str, image_file_path: str -) -> Set[DockerImage]: +def get_images_dict(repo_path: str, image_file_path: str) -> ImagesDict: + """Return images suppose to build on the current architecture host""" images_dict = {} path_to_images_file = os.path.join(repo_path, image_file_path) if os.path.exists(path_to_images_file): @@ -78,6 +86,13 @@ def get_changed_docker_images( "Image file %s doesnt exists in repo %s", image_file_path, repo_path ) + return images_dict + + +def get_changed_docker_images( + pr_info: PRInfo, images_dict: ImagesDict +) -> Set[DockerImage]: + if not images_dict: return set() @@ -96,6 +111,7 @@ def get_changed_docker_images( for f in files_changed: if f.startswith(dockerfile_dir): name = image_description["name"] + only_amd64 = image_description.get("only_amd64", False) logging.info( "Found changed file '%s' which affects " "docker image '%s' with path '%s'", @@ -103,7 +119,7 @@ def get_changed_docker_images( name, dockerfile_dir, ) - changed_images.append(DockerImage(dockerfile_dir, name)) + changed_images.append(DockerImage(dockerfile_dir, name, only_amd64)) break # The order is important: dependents should go later than bases, so that @@ -118,9 +134,9 @@ def get_changed_docker_images( dependent, image, ) - changed_images.append( - DockerImage(dependent, images_dict[dependent]["name"], image) - ) + name = images_dict[dependent]["name"] + only_amd64 = images_dict[dependent].get("only_amd64", False) + changed_images.append(DockerImage(dependent, name, only_amd64, image)) index += 1 if index > 5 * len(images_dict): # Sanity check to prevent infinite loop. @@ -161,12 +177,43 @@ def gen_versions( return versions, result_version +def build_and_push_dummy_image( + image: DockerImage, + version_string: str, + push: bool, +) -> Tuple[bool, str]: + dummy_source = "ubuntu:20.04" + logging.info("Building docker image %s as %s", image.repo, dummy_source) + build_log = os.path.join( + TEMP_PATH, f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}" + ) + with open(build_log, "wb") as bl: + cmd = ( + f"docker pull {dummy_source}; " + f"docker tag {dummy_source} {image.repo}:{version_string}; " + ) + if push: + cmd += f"docker push {image.repo}:{version_string}" + + logging.info("Docker command to run: %s", cmd) + with subprocess.Popen(cmd, shell=True, stderr=bl, stdout=bl) as proc: + retcode = proc.wait() + + if retcode != 0: + return False, build_log + + logging.info("Processing of %s successfully finished", image.repo) + return True, build_log + + def build_and_push_one_image( image: DockerImage, version_string: str, push: bool, child: bool, ) -> Tuple[bool, str]: + if image.only_amd64 and platform.machine() not in ["amd64", "x86_64"]: + return build_and_push_dummy_image(image, version_string, push) logging.info( "Building docker image %s with version %s from path %s", image.repo, @@ -290,10 +337,15 @@ def parse_args() -> argparse.Namespace: default="clickhouse", help="docker hub repository prefix", ) + parser.add_argument( + "--all", + action="store_true", + help="rebuild all images", + ) parser.add_argument( "--image-path", type=str, - action="append", + nargs="*", help="list of image paths to build instead of using pr_info + diff URL, " "e.g. 'docker/packager/binary'", ) @@ -336,15 +388,18 @@ def main(): shutil.rmtree(TEMP_PATH) os.makedirs(TEMP_PATH) - if args.image_path: + images_dict = get_images_dict(GITHUB_WORKSPACE, "docker/images.json") + + if args.all: + pr_info = PRInfo() + pr_info.changed_files = set(images_dict.keys()) + elif args.image_path: pr_info = PRInfo() pr_info.changed_files = set(i for i in args.image_path) else: pr_info = PRInfo(need_changed_files=True) - changed_images = get_changed_docker_images( - pr_info, GITHUB_WORKSPACE, "docker/images.json" - ) + changed_images = get_changed_docker_images(pr_info, images_dict) logging.info("Has changed images %s", ", ".join([im.path for im in changed_images])) image_versions, result_version = gen_versions(pr_info, args.suffix) diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index c6814b911ff..82d012bfe1a 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -57,7 +57,7 @@ def parse_args() -> argparse.Namespace: args = parser.parse_args() if len(args.suffixes) < 2: - raise parser.error("more than two --suffix should be given") + parser.error("more than two --suffix should be given") return args @@ -81,6 +81,7 @@ def strip_suffix(suffix: str, images: Images) -> Images: def check_sources(to_merge: Dict[str, Images]) -> Images: + """get a dict {arch1: Images, arch2: Images}""" result = {} # type: Images first_suffix = "" for suffix, images in to_merge.items(): diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 4392641b215..27bfe07db53 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -23,54 +23,69 @@ class TestDockerImageCheck(unittest.TestCase): "docker/docs/builder", } images = sorted( - list(di.get_changed_docker_images(pr_info, "/", self.docker_images_path)) + list( + di.get_changed_docker_images( + pr_info, di.get_images_dict("/", self.docker_images_path) + ) + ) ) self.maxDiff = None expected = sorted( [ - di.DockerImage("docker/test/base", "clickhouse/test-base"), - di.DockerImage("docker/docs/builder", "clickhouse/docs-builder"), + di.DockerImage("docker/test/base", "clickhouse/test-base", False), + di.DockerImage("docker/docs/builder", "clickhouse/docs-builder", True), di.DockerImage( "docker/test/stateless", "clickhouse/stateless-test", + False, "clickhouse/test-base", ), di.DockerImage( "docker/test/integration/base", "clickhouse/integration-test", + False, "clickhouse/test-base", ), di.DockerImage( - "docker/test/fuzzer", "clickhouse/fuzzer", "clickhouse/test-base" + "docker/test/fuzzer", + "clickhouse/fuzzer", + False, + "clickhouse/test-base", ), di.DockerImage( "docker/test/keeper-jepsen", "clickhouse/keeper-jepsen-test", + False, "clickhouse/test-base", ), di.DockerImage( "docker/docs/check", "clickhouse/docs-check", + False, "clickhouse/docs-builder", ), di.DockerImage( "docker/docs/release", "clickhouse/docs-release", + False, "clickhouse/docs-builder", ), di.DockerImage( "docker/test/stateful", "clickhouse/stateful-test", + False, "clickhouse/stateless-test", ), di.DockerImage( "docker/test/unit", "clickhouse/unit-test", + False, "clickhouse/stateless-test", ), di.DockerImage( "docker/test/stress", "clickhouse/stress-test", + False, "clickhouse/stateful-test", ), ] @@ -92,13 +107,15 @@ class TestDockerImageCheck(unittest.TestCase): @patch("builtins.open") @patch("subprocess.Popen") - def test_build_and_push_one_image(self, mock_popen, mock_open): + @patch("platform.machine") + def test_build_and_push_one_image(self, mock_machine, mock_popen, mock_open): mock_popen.return_value.__enter__.return_value.wait.return_value = 0 - image = di.DockerImage("path", "name", gh_repo_path="") + image = di.DockerImage("path", "name", False, gh_repo_path="") result, _ = di.build_and_push_one_image(image, "version", True, True) mock_open.assert_called_once() mock_popen.assert_called_once() + mock_machine.assert_not_called() self.assertIn( "docker buildx build --builder default --build-arg FROM_TAG=version " "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version --cache-from " @@ -106,11 +123,15 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.call_args.args, ) self.assertTrue(result) + mock_open.reset_mock() + mock_popen.reset_mock() + mock_machine.reset_mock() - mock_open.reset() - mock_popen.reset() mock_popen.return_value.__enter__.return_value.wait.return_value = 0 result, _ = di.build_and_push_one_image(image, "version2", False, True) + mock_open.assert_called_once() + mock_popen.assert_called_once() + mock_machine.assert_not_called() self.assertIn( "docker buildx build --builder default --build-arg FROM_TAG=version2 " "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from " @@ -119,8 +140,14 @@ class TestDockerImageCheck(unittest.TestCase): ) self.assertTrue(result) + mock_open.reset_mock() + mock_popen.reset_mock() + mock_machine.reset_mock() mock_popen.return_value.__enter__.return_value.wait.return_value = 1 result, _ = di.build_and_push_one_image(image, "version2", False, False) + mock_open.assert_called_once() + mock_popen.assert_called_once() + mock_machine.assert_not_called() self.assertIn( "docker buildx build --builder default " "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from " @@ -129,13 +156,37 @@ class TestDockerImageCheck(unittest.TestCase): ) self.assertFalse(result) + mock_open.reset_mock() + mock_popen.reset_mock() + mock_machine.reset_mock() + only_amd64_image = di.DockerImage("path", "name", True) + mock_popen.return_value.__enter__.return_value.wait.return_value = 0 + + result, _ = di.build_and_push_one_image(only_amd64_image, "version", True, True) + mock_open.assert_called_once() + mock_popen.assert_called_once() + mock_machine.assert_called_once() + self.assertIn( + "docker pull ubuntu:20.04; docker tag ubuntu:20.04 name:version; " + "docker push name:version", + mock_popen.call_args.args, + ) + self.assertTrue(result) + result, _ = di.build_and_push_one_image( + only_amd64_image, "version", False, True + ) + self.assertIn( + "docker pull ubuntu:20.04; docker tag ubuntu:20.04 name:version; ", + mock_popen.call_args.args, + ) + @patch("docker_images_check.build_and_push_one_image") def test_process_image_with_parents(self, mock_build): mock_build.side_effect = lambda w, x, y, z: (True, f"{w.repo}_{x}.log") - im1 = di.DockerImage("path1", "repo1") - im2 = di.DockerImage("path2", "repo2", im1) - im3 = di.DockerImage("path3", "repo3", im2) - im4 = di.DockerImage("path4", "repo4", im1) + im1 = di.DockerImage("path1", "repo1", False) + im2 = di.DockerImage("path2", "repo2", False, im1) + im3 = di.DockerImage("path3", "repo3", False, im2) + im4 = di.DockerImage("path4", "repo4", False, im1) # We use list to have determined order of image builgings images = [im4, im1, im3, im2, im1] results = [ diff --git a/tests/ci/tests/docker_images.json b/tests/ci/tests/docker_images.json index 354bdaa8728..ca5c516bccb 100644 --- a/tests/ci/tests/docker_images.json +++ b/tests/ci/tests/docker_images.json @@ -150,6 +150,7 @@ }, "docker/docs/builder": { "name": "clickhouse/docs-builder", + "only_amd64": true, "dependent": [ "docker/docs/check", "docker/docs/release" diff --git a/tests/integration/test_distributed_insert_backward_compatibility/__init__.py b/tests/integration/test_distributed_insert_backward_compatibility/__init__.py new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/tests/integration/test_distributed_insert_backward_compatibility/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/integration/test_distributed_insert_backward_compatibility/configs/remote_servers.xml b/tests/integration/test_distributed_insert_backward_compatibility/configs/remote_servers.xml new file mode 100644 index 00000000000..9c7f02c190f --- /dev/null +++ b/tests/integration/test_distributed_insert_backward_compatibility/configs/remote_servers.xml @@ -0,0 +1,12 @@ + + + + + + node1 + 9000 + + + + + diff --git a/tests/integration/test_distributed_insert_backward_compatibility/test.py b/tests/integration/test_distributed_insert_backward_compatibility/test.py new file mode 100644 index 00000000000..ba7d8e0a25d --- /dev/null +++ b/tests/integration/test_distributed_insert_backward_compatibility/test.py @@ -0,0 +1,39 @@ +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) + +node_shard = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml']) + +node_dist = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], image='yandex/clickhouse-server', + tag='21.11.9.1', stay_alive=True, with_installed_binary=True) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node_shard.query("CREATE TABLE local_table(id UInt32, val String) ENGINE = MergeTree ORDER BY id") + node_dist.query("CREATE TABLE local_table(id UInt32, val String) ENGINE = MergeTree ORDER BY id") + node_dist.query("CREATE TABLE dist_table(id UInt32, val String) ENGINE = Distributed(test_cluster, default, local_table, rand())") + + yield cluster + + finally: + cluster.shutdown() + + +def test_distributed_in_tuple(started_cluster): + node_dist.query("SYSTEM STOP DISTRIBUTED SENDS dist_table") + + node_dist.query("INSERT INTO dist_table VALUES (1, 'foo')") + assert node_dist.query("SELECT count() FROM dist_table") == "0\n" + assert node_shard.query("SELECT count() FROM local_table") == "0\n" + + node_dist.restart_with_latest_version(signal=9) + node_dist.query("SYSTEM FLUSH DISTRIBUTED dist_table") + + assert node_dist.query("SELECT count() FROM dist_table") == "1\n" + assert node_shard.query("SELECT count() FROM local_table") == "1\n" diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 89e07fecb0a..196141f9bfe 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -150,8 +150,11 @@ def test_grant_all_on_table(): instance.query("CREATE USER A, B") instance.query("GRANT ALL ON test.table TO A WITH GRANT OPTION") instance.query("GRANT ALL ON test.table TO B", user='A') - assert instance.query( - "SHOW GRANTS FOR B") == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP TABLE, DROP VIEW, DROP DICTIONARY, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.table TO B\n" + assert instance.query("SHOW GRANTS FOR B") ==\ + "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, "\ + "DROP TABLE, DROP VIEW, DROP DICTIONARY, TRUNCATE, OPTIMIZE, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, SHOW ROW POLICIES, "\ + "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, "\ + "SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.table TO B\n" instance.query("REVOKE ALL ON test.table FROM B", user='A') assert instance.query("SHOW GRANTS FOR B") == "" diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index b6968575883..bd9a0cbe438 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -2,6 +2,8 @@ import os import pytest import sys import time +import pytz +import uuid import grpc from helpers.cluster import ClickHouseCluster, run_and_check from threading import Thread @@ -43,8 +45,8 @@ def create_channel(): main_channel = channel return channel -def query_common(query_text, settings={}, input_data=[], input_data_delimiter='', output_format='TabSeparated', external_tables=[], - user_name='', password='', query_id='123', session_id='', stream_output=False, channel=None): +def query_common(query_text, settings={}, input_data=[], input_data_delimiter='', output_format='TabSeparated', send_output_columns=False, + external_tables=[], user_name='', password='', query_id='123', session_id='', stream_output=False, channel=None): if type(input_data) is not list: input_data = [input_data] if type(input_data_delimiter) is str: @@ -58,7 +60,8 @@ def query_common(query_text, settings={}, input_data=[], input_data_delimiter='' input_data_part = input_data_part.encode(DEFAULT_ENCODING) return clickhouse_grpc_pb2.QueryInfo(query=query_text, settings=settings, input_data=input_data_part, input_data_delimiter=input_data_delimiter, output_format=output_format, - external_tables=external_tables, user_name=user_name, password=password, query_id=query_id, + send_output_columns=send_output_columns, external_tables=external_tables, + user_name=user_name, password=password, query_id=query_id, session_id=session_id, next_query_info=bool(input_data)) def send_query_info(): yield query_info() @@ -204,6 +207,28 @@ def test_totals_and_extremes(): assert query("SELECT x, y FROM t") == "1\t2\n2\t4\n3\t2\n3\t3\n3\t4\n" assert query_and_get_extremes("SELECT x, y FROM t", settings={"extremes": "1"}) == "1\t2\n3\t4\n" +def test_get_query_details(): + result = list(query_no_errors("CREATE TABLE t (a UInt8) ENGINE = Memory", query_id = '123'))[0] + assert result.query_id == '123' + pytz.timezone(result.time_zone) + assert result.output_format == '' + assert len(result.output_columns) == 0 + assert result.output == b'' + # + result = list(query_no_errors("SELECT 'a', 1", query_id = '', output_format = 'TabSeparated'))[0] + uuid.UUID(result.query_id) + pytz.timezone(result.time_zone) + assert result.output_format == 'TabSeparated' + assert len(result.output_columns) == 0 + assert result.output == b'a\t1\n' + # + result = list(query_no_errors("SELECT 'a' AS x, 1 FORMAT JSONEachRow", query_id = '', send_output_columns=True))[0] + uuid.UUID(result.query_id) + pytz.timezone(result.time_zone) + assert result.output_format == 'JSONEachRow' + assert ([(col.name, col.type) for col in result.output_columns]) == [('x', 'String'), ('1', 'UInt8')] + assert result.output == b'{"x":"a","1":1}\n' + def test_errors_handling(): e = query_and_get_error("") #print(e) @@ -225,6 +250,9 @@ def test_logs(): def test_progress(): results = query_no_errors("SELECT number, sleep(0.31) FROM numbers(8) SETTINGS max_block_size=2, interactive_delay=100000", stream_output=True) + for result in results: + result.time_zone = '' + result.query_id = '' #print(results) assert str(results) ==\ """[progress { @@ -232,6 +260,7 @@ def test_progress(): read_bytes: 16 total_rows_to_read: 8 } +output_format: "TabSeparated" , output: "0\\t0\\n1\\t0\\n" , progress { read_rows: 2 @@ -373,22 +402,14 @@ def test_cancel_while_generating_output(): output += result.output assert output == b'0\t0\n1\t0\n2\t0\n3\t0\n' -def test_result_compression(): - query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000000)", - result_compression=clickhouse_grpc_pb2.Compression(algorithm=clickhouse_grpc_pb2.CompressionAlgorithm.GZIP, - level=clickhouse_grpc_pb2.CompressionLevel.COMPRESSION_HIGH)) - stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) - result = stub.ExecuteQuery(query_info) - assert result.output == (b'0\n')*1000000 - def test_compressed_output(): - query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000)", compression_type="lz4") + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000)", output_compression_type="lz4") stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) result = stub.ExecuteQuery(query_info) assert lz4.frame.decompress(result.output) == (b'0\n')*1000 def test_compressed_output_streaming(): - query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(100000)", compression_type="lz4") + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(100000)", output_compression_type="lz4") stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) d_context = lz4.frame.create_decompression_context() data = b'' @@ -398,7 +419,7 @@ def test_compressed_output_streaming(): assert data == (b'0\n')*100000 def test_compressed_output_gzip(): - query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000)", compression_type="gzip", compression_level=6) + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000)", output_compression_type="gzip", output_compression_level=6) stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) result = stub.ExecuteQuery(query_info) assert gzip.decompress(result.output) == (b'0\n')*1000 @@ -407,10 +428,10 @@ def test_compressed_totals_and_extremes(): query("CREATE TABLE t (x UInt8, y UInt8) ENGINE = Memory") query("INSERT INTO t VALUES (1, 2), (2, 4), (3, 2), (3, 3), (3, 4)") stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) - query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT sum(x), y FROM t GROUP BY y WITH TOTALS", compression_type="lz4") + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT sum(x), y FROM t GROUP BY y WITH TOTALS", output_compression_type="lz4") result = stub.ExecuteQuery(query_info) assert lz4.frame.decompress(result.totals) == b'12\t0\n' - query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT x, y FROM t", settings={"extremes": "1"}, compression_type="lz4") + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT x, y FROM t", settings={"extremes": "1"}, output_compression_type="lz4") result = stub.ExecuteQuery(query_info) assert lz4.frame.decompress(result.extremes) == b'1\t2\n3\t4\n' @@ -423,7 +444,7 @@ def test_compressed_insert_query_streaming(): d2 = data[sz1:sz1+sz2] d3 = data[sz1+sz2:] def send_query_info(): - yield clickhouse_grpc_pb2.QueryInfo(query="INSERT INTO t VALUES", input_data=d1, compression_type="lz4", next_query_info=True) + yield clickhouse_grpc_pb2.QueryInfo(query="INSERT INTO t VALUES", input_data=d1, input_compression_type="lz4", next_query_info=True) yield clickhouse_grpc_pb2.QueryInfo(input_data=d2, next_query_info=True) yield clickhouse_grpc_pb2.QueryInfo(input_data=d3) stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) @@ -445,6 +466,12 @@ def test_compressed_external_table(): b"4\tDaniel\n"\ b"5\tEthan\n" +def test_transport_compression(): + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000000)", transport_compression_type='gzip', transport_compression_level=3) + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + result = stub.ExecuteQuery(query_info) + assert result.output == (b'0\n')*1000000 + def test_opentelemetry_context_propagation(): trace_id = "80c190b5-9dc1-4eae-82b9-6c261438c817" parent_span_id = 123 diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index 66a35bea06b..0a7f6958b4a 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -389,6 +389,52 @@ def test_dcl_management(): assert node.query("SHOW POLICIES") == "" +def test_grant_create_row_policy(): + copy_policy_xml('no_filters.xml') + assert node.query("SHOW POLICIES") == "" + node.query("CREATE USER X") + + expected_error = "necessary to have grant CREATE ROW POLICY ON mydb.filtered_table1" + assert expected_error in node.query_and_get_error("CREATE POLICY pA ON mydb.filtered_table1 FOR SELECT USING a settings = 5; -} - -enum CompressionAlgorithm { - NO_COMPRESSION = 0; - DEFLATE = 1; - GZIP = 2; - STREAM_GZIP = 3; -} - -enum CompressionLevel { - COMPRESSION_NONE = 0; - COMPRESSION_LOW = 1; - COMPRESSION_MEDIUM = 2; - COMPRESSION_HIGH = 3; -} - -message Compression { - CompressionAlgorithm algorithm = 1; - CompressionLevel level = 2; -} - -// Information about a query which a client sends to a ClickHouse server. -// The first QueryInfo can set any of the following fields. Extra QueryInfos only add extra data. -// In extra QueryInfos only `input_data`, `external_tables`, `next_query_info` and `cancel` fields can be set. -message QueryInfo { - string query = 1; - string query_id = 2; - map settings = 3; - - // Default database. - string database = 4; - - // Input data, used both as data for INSERT query and as data for the input() function. - bytes input_data = 5; - - // Delimiter for input_data, inserted between input_data from adjacent QueryInfos. - bytes input_data_delimiter = 6; - - // Default output format. If not specified, 'TabSeparated' is used. - string output_format = 7; - - repeated ExternalTable external_tables = 8; - - string user_name = 9; - string password = 10; - string quota = 11; - - // Works exactly like sessions in the HTTP protocol. - string session_id = 12; - bool session_check = 13; - uint32 session_timeout = 14; - - // Set `cancel` to true to stop executing the query. - bool cancel = 15; - - // If true there will be at least one more QueryInfo in the input stream. - // `next_query_info` is allowed to be set only if a method with streaming input (i.e. ExecuteQueryWithStreamInput() or ExecuteQueryWithStreamIO()) is used. - bool next_query_info = 16; - - /// Controls how a ClickHouse server will compress query execution results before sending back to the client. - /// If not set the compression settings from the configuration file will be used. - Compression result_compression = 17; -} - -enum LogsLevel { - LOG_NONE = 0; - LOG_FATAL = 1; - LOG_CRITICAL = 2; - LOG_ERROR = 3; - LOG_WARNING = 4; - LOG_NOTICE = 5; - LOG_INFORMATION = 6; - LOG_DEBUG = 7; - LOG_TRACE = 8; -} - -message LogEntry { - uint32 time = 1; - uint32 time_microseconds = 2; - uint64 thread_id = 3; - string query_id = 4; - LogsLevel level = 5; - string source = 6; - string text = 7; -} - -message Progress { - uint64 read_rows = 1; - uint64 read_bytes = 2; - uint64 total_rows_to_read = 3; - uint64 written_rows = 4; - uint64 written_bytes = 5; -} - -message Stats { - uint64 rows = 1; - uint64 blocks = 2; - uint64 allocated_bytes = 3; - bool applied_limit = 4; - uint64 rows_before_limit = 5; -} - -message Exception { - int32 code = 1; - string name = 2; - string display_text = 3; - string stack_trace = 4; -} - -// Result of execution of a query which is sent back by the ClickHouse server to the client. -message Result { - // Output of the query, represented in the `output_format` or in a format specified in `query`. - bytes output = 1; - bytes totals = 2; - bytes extremes = 3; - - repeated LogEntry logs = 4; - Progress progress = 5; - Stats stats = 6; - - // Set by the ClickHouse server if there was an exception thrown while executing. - Exception exception = 7; - - // Set by the ClickHouse server if executing was cancelled by the `cancel` field in QueryInfo. - bool cancelled = 8; -} - -service ClickHouse { - rpc ExecuteQuery(QueryInfo) returns (Result) {} - rpc ExecuteQueryWithStreamInput(stream QueryInfo) returns (Result) {} - rpc ExecuteQueryWithStreamOutput(QueryInfo) returns (stream Result) {} - rpc ExecuteQueryWithStreamIO(stream QueryInfo) returns (stream Result) {} -} diff --git a/tests/integration/test_server_reload/protos/clickhouse_grpc.proto b/tests/integration/test_server_reload/protos/clickhouse_grpc.proto new file mode 120000 index 00000000000..25d15f11e3b --- /dev/null +++ b/tests/integration/test_server_reload/protos/clickhouse_grpc.proto @@ -0,0 +1 @@ +../../../../src/Server/grpc_protos/clickhouse_grpc.proto \ No newline at end of file diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index b6ac121cd0c..87337a6b459 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -13,11 +13,19 @@ node2 = cluster.add_instance('node2', main_configs=['configs/named_collections.x def started_cluster(): try: cluster.start() + node1.query("CREATE DATABASE test") yield cluster finally: cluster.shutdown() +@pytest.fixture(autouse=True) +def setup_teardown(): + print("PostgreSQL is available - running test") + yield # run test + node1.query("DROP DATABASE test") + node1.query("CREATE DATABASE test") + def test_postgres_select_insert(started_cluster): cursor = started_cluster.postgres_conn.cursor() table_name = 'test_many' @@ -143,11 +151,11 @@ def test_non_default_scema(started_cluster): cursor.execute('INSERT INTO test_schema.test_table SELECT i FROM generate_series(0, 99) as t(i)') node1.query(''' - CREATE TABLE test_pg_table_schema (a UInt32) + CREATE TABLE test.test_pg_table_schema (a UInt32) ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test_table', 'postgres', 'mysecretpassword', 'test_schema'); ''') - result = node1.query('SELECT * FROM test_pg_table_schema') + result = node1.query('SELECT * FROM test.test_pg_table_schema') expected = node1.query('SELECT number FROM numbers(100)') assert(result == expected) @@ -160,10 +168,10 @@ def test_non_default_scema(started_cluster): cursor.execute('INSERT INTO "test.nice.schema"."test.nice.table" SELECT i FROM generate_series(0, 99) as t(i)') node1.query(''' - CREATE TABLE test_pg_table_schema_with_dots (a UInt32) + CREATE TABLE test.test_pg_table_schema_with_dots (a UInt32) ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test.nice.table', 'postgres', 'mysecretpassword', 'test.nice.schema'); ''') - result = node1.query('SELECT * FROM test_pg_table_schema_with_dots') + result = node1.query('SELECT * FROM test.test_pg_table_schema_with_dots') assert(result == expected) cursor.execute('INSERT INTO "test_schema"."test_table" SELECT i FROM generate_series(100, 199) as t(i)') @@ -173,8 +181,8 @@ def test_non_default_scema(started_cluster): cursor.execute('DROP SCHEMA test_schema CASCADE') cursor.execute('DROP SCHEMA "test.nice.schema" CASCADE') - node1.query('DROP TABLE test_pg_table_schema') - node1.query('DROP TABLE test_pg_table_schema_with_dots') + node1.query('DROP TABLE test.test_pg_table_schema') + node1.query('DROP TABLE test.test_pg_table_schema_with_dots') def test_concurrent_queries(started_cluster): @@ -302,19 +310,19 @@ def test_postgres_distributed(started_cluster): def test_datetime_with_timezone(started_cluster): cursor = started_cluster.postgres_conn.cursor() cursor.execute("DROP TABLE IF EXISTS test_timezone") - node1.query("DROP TABLE IF EXISTS test_timezone") + node1.query("DROP TABLE IF EXISTS test.test_timezone") cursor.execute("CREATE TABLE test_timezone (ts timestamp without time zone, ts_z timestamp with time zone)") cursor.execute("insert into test_timezone select '2014-04-04 20:00:00', '2014-04-04 20:00:00'::timestamptz at time zone 'America/New_York';") cursor.execute("select * from test_timezone") result = cursor.fetchall()[0] logging.debug(f'{result[0]}, {str(result[1])[:-6]}') - node1.query("create table test_timezone ( ts DateTime, ts_z DateTime('America/New_York')) ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test_timezone', 'postgres', 'mysecretpassword');") - assert(node1.query("select ts from test_timezone").strip() == str(result[0])) + node1.query("create table test.test_timezone ( ts DateTime, ts_z DateTime('America/New_York')) ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test_timezone', 'postgres', 'mysecretpassword');") + assert(node1.query("select ts from test.test_timezone").strip() == str(result[0])) # [:-6] because 2014-04-04 16:00:00+00:00 -> 2014-04-04 16:00:00 - assert(node1.query("select ts_z from test_timezone").strip() == str(result[1])[:-6]) - assert(node1.query("select * from test_timezone") == "2014-04-04 20:00:00\t2014-04-04 16:00:00\n") + assert(node1.query("select ts_z from test.test_timezone").strip() == str(result[1])[:-6]) + assert(node1.query("select * from test.test_timezone") == "2014-04-04 20:00:00\t2014-04-04 16:00:00\n") cursor.execute("DROP TABLE test_timezone") - node1.query("DROP TABLE test_timezone") + node1.query("DROP TABLE test.test_timezone") def test_postgres_ndim(started_cluster): @@ -342,20 +350,20 @@ def test_postgres_on_conflict(started_cluster): cursor.execute(f'CREATE TABLE {table} (a integer PRIMARY KEY, b text, c integer)') node1.query(''' - CREATE TABLE test_conflict (a UInt32, b String, c Int32) + CREATE TABLE test.test_conflict (a UInt32, b String, c Int32) ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test_conflict', 'postgres', 'mysecretpassword', '', 'ON CONFLICT DO NOTHING'); ''') - node1.query(f''' INSERT INTO {table} SELECT number, concat('name_', toString(number)), 3 from numbers(100)''') - node1.query(f''' INSERT INTO {table} SELECT number, concat('name_', toString(number)), 4 from numbers(100)''') + node1.query(f''' INSERT INTO test.{table} SELECT number, concat('name_', toString(number)), 3 from numbers(100)''') + node1.query(f''' INSERT INTO test.{table} SELECT number, concat('name_', toString(number)), 4 from numbers(100)''') - check1 = f"SELECT count() FROM {table}" + check1 = f"SELECT count() FROM test.{table}" assert (node1.query(check1)).rstrip() == '100' table_func = f'''postgresql('{started_cluster.postgres_ip}:{started_cluster.postgres_port}', 'postgres', '{table}', 'postgres', 'mysecretpassword', '', 'ON CONFLICT DO NOTHING')''' node1.query(f'''INSERT INTO TABLE FUNCTION {table_func} SELECT number, concat('name_', toString(number)), 3 from numbers(100)''') node1.query(f'''INSERT INTO TABLE FUNCTION {table_func} SELECT number, concat('name_', toString(number)), 3 from numbers(100)''') - check1 = f"SELECT count() FROM {table}" + check1 = f"SELECT count() FROM test.{table}" assert (node1.query(check1)).rstrip() == '100' cursor.execute(f'DROP TABLE {table} ') @@ -367,48 +375,48 @@ def test_predefined_connection_configuration(started_cluster): cursor.execute(f'CREATE TABLE test_table (a integer PRIMARY KEY, b integer)') node1.query(''' - DROP TABLE IF EXISTS test_table; - CREATE TABLE test_table (a UInt32, b Int32) + DROP TABLE IF EXISTS test.test_table; + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(postgres1); ''') - node1.query(f''' INSERT INTO test_table SELECT number, number from numbers(100)''') - assert (node1.query(f"SELECT count() FROM test_table").rstrip() == '100') + node1.query(f''' INSERT INTO test.test_table SELECT number, number from numbers(100)''') + assert (node1.query(f"SELECT count() FROM test.test_table").rstrip() == '100') node1.query(''' - DROP TABLE test_table; - CREATE TABLE test_table (a UInt32, b Int32) + DROP TABLE test.test_table; + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(postgres1, on_conflict='ON CONFLICT DO NOTHING'); ''') - node1.query(f''' INSERT INTO test_table SELECT number, number from numbers(100)''') - node1.query(f''' INSERT INTO test_table SELECT number, number from numbers(100)''') - assert (node1.query(f"SELECT count() FROM test_table").rstrip() == '100') + node1.query(f''' INSERT INTO test.test_table SELECT number, number from numbers(100)''') + node1.query(f''' INSERT INTO test.test_table SELECT number, number from numbers(100)''') + assert (node1.query(f"SELECT count() FROM test.test_table").rstrip() == '100') - node1.query('DROP TABLE test_table;') + node1.query('DROP TABLE test.test_table;') node1.query_and_get_error(''' - CREATE TABLE test_table (a UInt32, b Int32) + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(postgres1, 'ON CONFLICT DO NOTHING'); ''') node1.query_and_get_error(''' - CREATE TABLE test_table (a UInt32, b Int32) + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(postgres2); ''') node1.query_and_get_error(''' - CREATE TABLE test_table (a UInt32, b Int32) + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(unknown_collection); ''') node1.query(''' - CREATE TABLE test_table (a UInt32, b Int32) + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(postgres1, port=5432, database='postgres', table='test_table'); ''') - assert (node1.query(f"SELECT count() FROM test_table").rstrip() == '100') + assert (node1.query(f"SELECT count() FROM test.test_table").rstrip() == '100') node1.query(''' - DROP TABLE test_table; - CREATE TABLE test_table (a UInt32, b Int32) + DROP TABLE test.test_table; + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(postgres3, port=5432); ''') - assert (node1.query(f"SELECT count() FROM test_table").rstrip() == '100') + assert (node1.query(f"SELECT count() FROM test.test_table").rstrip() == '100') assert (node1.query(f"SELECT count() FROM postgresql(postgres1)").rstrip() == '100') node1.query("INSERT INTO TABLE FUNCTION postgresql(postgres1, on_conflict='ON CONFLICT DO NOTHING') SELECT number, number from numbers(100)") diff --git a/tests/performance/file_table_function.xml b/tests/performance/file_table_function.xml new file mode 100644 index 00000000000..143f2b5eb4d --- /dev/null +++ b/tests/performance/file_table_function.xml @@ -0,0 +1,54 @@ + + + + + format + + TabSeparated + TabSeparatedWithNames + TabSeparatedWithNamesAndTypes + CSV + CSVWithNames + Values + JSONEachRow + JSONCompactEachRow + JSONCompactEachRowWithNamesAndTypes + TSKV + RowBinary + Native + MsgPack + + + + partitions_count + + 5 + 50 + 500 + + + + + + INSERT INTO FUNCTION file('test_file', '{format}', 'key UInt64, value UInt64') + SELECT number, number FROM numbers(1000000) + + + + INSERT INTO FUNCTION file('test_file', '{format}', 'key UInt64, value1 UInt64, value2 UInt64, value3 UInt64, value4 UInt64, value5 UInt64') + SELECT number, number, number, number, number, number FROM numbers(1000000) + + + + INSERT INTO FUNCTION file('test_file_{{_partition_id}}', '{format}', 'partition_id UInt64, value UInt64') + PARTITION BY partition_id + SELECT (number % {partitions_count}) as partition_id, number FROM numbers(1000000) + + + + INSERT INTO FUNCTION file('test_file_{{_partition_id}}', '{format}', 'partition_id UInt64, value1 UInt64, value2 UInt64, value3 UInt64, value4 UInt64, value5 UInt64') + PARTITION BY partition_id + SELECT (number % {partitions_count}) as partition_id, number, number, number, number, number FROM numbers(1000000) + + + diff --git a/tests/performance/select_format.xml b/tests/performance/select_format.xml index f0114d7a517..982039102d0 100644 --- a/tests/performance/select_format.xml +++ b/tests/performance/select_format.xml @@ -49,8 +49,8 @@ CREATE TABLE IF NOT EXISTS table_{format_slow} ENGINE = File({format_slow}, '/dev/null') AS test.hits CREATE TABLE IF NOT EXISTS table_{format_fast} ENGINE = File({format_fast}, '/dev/null') AS test.hits - INSERT INTO table_{format_slow} SELECT * FROM test.hits LIMIT 10000 SETTINGS engine_file_truncate_on_insert = 1 - INSERT INTO table_{format_fast} SELECT * FROM test.hits LIMIT 100000 SETTINGS engine_file_truncate_on_insert = 1 + INSERT INTO table_{format_slow} SELECT * FROM test.hits LIMIT 10000 + INSERT INTO table_{format_fast} SELECT * FROM test.hits LIMIT 100000 DROP TABLE IF EXISTS table_{format_slow} DROP TABLE IF EXISTS table_{format_fast} diff --git a/tests/performance/writing_valid_utf8.xml b/tests/performance/writing_valid_utf8.xml index 4f6637f7566..d5343144db0 100644 --- a/tests/performance/writing_valid_utf8.xml +++ b/tests/performance/writing_valid_utf8.xml @@ -20,7 +20,7 @@ CREATE TABLE IF NOT EXISTS table_{format} ENGINE = File({format}, '/dev/null') AS SELECT SearchPhrase, ClientIP6, URL, Referer, URLDomain FROM test.hits limit 0 - INSERT INTO table_{format} SELECT SearchPhrase, ClientIP6, URL, Referer, URLDomain FROM test.hits LIMIT 100000 SETTINGS engine_file_truncate_on_insert = 1 + INSERT INTO table_{format} SELECT SearchPhrase, ClientIP6, URL, Referer, URLDomain FROM test.hits LIMIT 100000 DROP TABLE IF EXISTS table_{format} diff --git a/tests/queries/0_stateless/00926_geo_to_h3.reference b/tests/queries/0_stateless/00926_geo_to_h3.reference index ad594f0e81f..074584ead16 100644 --- a/tests/queries/0_stateless/00926_geo_to_h3.reference +++ b/tests/queries/0_stateless/00926_geo_to_h3.reference @@ -4,12 +4,12 @@ 644325528491955313 644325528627451570 644325529094369568 -644325528491955313 +639821928864584823 644325528491955313 644325528491955313 644325528627451570 644325529094369568 -55.720762 37.598135 644325528491955313 +55.720762 37.598135 639821928864584823 55.720762 37.598135 644325528491955313 55.72076201 37.598135 644325528491955313 55.763241 37.660183 644325528627451570 diff --git a/tests/queries/0_stateless/00926_geo_to_h3.sql b/tests/queries/0_stateless/00926_geo_to_h3.sql index ed8e154fd9e..a86548d3555 100644 --- a/tests/queries/0_stateless/00926_geo_to_h3.sql +++ b/tests/queries/0_stateless/00926_geo_to_h3.sql @@ -11,9 +11,10 @@ INSERT INTO table1 VALUES(55.72076201, 37.59813500, 15); INSERT INTO table1 VALUES(55.72076200, 37.59813500, 14); select geoToH3(37.63098076, 55.77922738, 15); +select geoToH3(37.63098076, 55.77922738, 24); -- { serverError 69 } select geoToH3(lon, lat, resolution) from table1 order by lat, lon, resolution; -select geoToH3(lon, lat, 15) AS k from table1 order by lat, lon, k; -select lat, lon, geoToH3(lon, lat, 15) AS k from table1 order by lat, lon, k; +select geoToH3(lon, lat, resolution) AS k from table1 order by lat, lon, k; +select lat, lon, geoToH3(lon, lat, resolution) AS k from table1 order by lat, lon, k; select geoToH3(lon, lat, resolution) AS k, count(*) from table1 group by k order by k; DROP TABLE table1 diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index c18992583cd..06bd6ab04e4 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -49,13 +49,13 @@ CREATE TABLE [] TABLE CREATE CREATE VIEW [] VIEW CREATE CREATE DICTIONARY [] DICTIONARY CREATE CREATE TEMPORARY TABLE [] GLOBAL CREATE -CREATE FUNCTION [] DATABASE CREATE +CREATE FUNCTION [] GLOBAL CREATE CREATE [] \N ALL DROP DATABASE [] DATABASE DROP DROP TABLE [] TABLE DROP DROP VIEW [] VIEW DROP DROP DICTIONARY [] DICTIONARY DROP -DROP FUNCTION [] DATABASE DROP +DROP FUNCTION [] GLOBAL DROP DROP [] \N ALL TRUNCATE ['TRUNCATE TABLE'] TABLE ALL OPTIMIZE ['OPTIMIZE TABLE'] TABLE ALL @@ -68,9 +68,9 @@ CREATE ROLE [] GLOBAL ACCESS MANAGEMENT ALTER ROLE [] GLOBAL ACCESS MANAGEMENT DROP ROLE [] GLOBAL ACCESS MANAGEMENT ROLE ADMIN [] GLOBAL ACCESS MANAGEMENT -CREATE ROW POLICY ['CREATE POLICY'] GLOBAL ACCESS MANAGEMENT -ALTER ROW POLICY ['ALTER POLICY'] GLOBAL ACCESS MANAGEMENT -DROP ROW POLICY ['DROP POLICY'] GLOBAL ACCESS MANAGEMENT +CREATE ROW POLICY ['CREATE POLICY'] TABLE ACCESS MANAGEMENT +ALTER ROW POLICY ['ALTER POLICY'] TABLE ACCESS MANAGEMENT +DROP ROW POLICY ['DROP POLICY'] TABLE ACCESS MANAGEMENT CREATE QUOTA [] GLOBAL ACCESS MANAGEMENT ALTER QUOTA [] GLOBAL ACCESS MANAGEMENT DROP QUOTA [] GLOBAL ACCESS MANAGEMENT @@ -79,7 +79,7 @@ ALTER SETTINGS PROFILE ['ALTER PROFILE'] GLOBAL ACCESS MANAGEMENT DROP SETTINGS PROFILE ['DROP PROFILE'] GLOBAL ACCESS MANAGEMENT SHOW USERS ['SHOW CREATE USER'] GLOBAL SHOW ACCESS SHOW ROLES ['SHOW CREATE ROLE'] GLOBAL SHOW ACCESS -SHOW ROW POLICIES ['SHOW POLICIES','SHOW CREATE ROW POLICY','SHOW CREATE POLICY'] GLOBAL SHOW ACCESS +SHOW ROW POLICIES ['SHOW POLICIES','SHOW CREATE ROW POLICY','SHOW CREATE POLICY'] TABLE SHOW ACCESS SHOW QUOTAS ['SHOW CREATE QUOTA'] GLOBAL SHOW ACCESS SHOW SETTINGS PROFILES ['SHOW PROFILES','SHOW CREATE SETTINGS PROFILE','SHOW CREATE PROFILE'] GLOBAL SHOW ACCESS SHOW ACCESS [] \N ACCESS MANAGEMENT diff --git a/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.reference b/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.reference new file mode 100644 index 00000000000..993dd9b1cde --- /dev/null +++ b/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.reference @@ -0,0 +1,11 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.sh b/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.sh new file mode 100755 index 00000000000..61244b80cf6 --- /dev/null +++ b/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# default values test +${CLICKHOUSE_CLIENT} --query "SELECT 1" + +# backward compatibility test +${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}" --port "${CLICKHOUSE_PORT_TCP}" --query "SELECT 1"; + +not_resolvable_host="notlocalhost" +exception_msg="Cannot resolve host (${not_resolvable_host}), error 0: ${not_resolvable_host}. +Code: 198. DB::Exception: Not found address of host: ${not_resolvable_host}. (DNS_ERROR) +" +error="$(${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}" "${not_resolvable_host}" --query "SELECT 1" 2>&1 > /dev/null)"; +[ "${error}" == "${exception_msg}" ]; echo "$?" + +not_number_port="abc" +exception_msg="Bad arguments: the argument ('${CLICKHOUSE_HOST}:${not_number_port}') for option '--host' is invalid." +error="$(${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}:${not_number_port}" --query "SELECT 1" 2>&1 > /dev/null)"; +[ "${error}" == "${exception_msg}" ]; echo "$?" + +not_alive_host="10.100.0.0" +${CLICKHOUSE_CLIENT} --host "${not_alive_host}" "${CLICKHOUSE_HOST}" --query "SELECT 1"; + +not_alive_port="1" +exception_msg="Code: 210. DB::NetException: Connection refused (${CLICKHOUSE_HOST}:${not_alive_port}). (NETWORK_ERROR) +" +error="$(${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}" --port "${not_alive_port}" --query "SELECT 1" 2>&1 > /dev/null)" +[ "${error}" == "${exception_msg}" ]; echo "$?" +${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}:${not_alive_port}" "${CLICKHOUSE_HOST}" --query "SELECT 1"; +${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_TCP}" --port "${not_alive_port}" --query "SELECT 1"; + +ipv6_host_without_brackets="2001:3984:3989::1:1000" +exception_msg="Code: 210. DB::NetException: Connection refused (${ipv6_host_without_brackets}). (NETWORK_ERROR) +" +error="$(${CLICKHOUSE_CLIENT} --host "${ipv6_host_without_brackets}" --query "SELECT 1" 2>&1 > /dev/null)" +[ "${error}" == "${exception_msg}" ]; echo "$?" + +ipv6_host_with_brackets="[2001:3984:3989::1:1000]" +exception_msg="Code: 210. DB::NetException: Connection refused (${ipv6_host_with_brackets}). (NETWORK_ERROR) +" +error="$(${CLICKHOUSE_CLIENT} --host "${ipv6_host_with_brackets}" --query "SELECT 1" 2>&1 > /dev/null)" +[ "${error}" == "${exception_msg}" ]; echo "$?" + +exception_msg="Code: 210. DB::NetException: Connection refused (${ipv6_host_with_brackets}:${not_alive_port}). (NETWORK_ERROR) +" +error="$(${CLICKHOUSE_CLIENT} --host "${ipv6_host_with_brackets}:${not_alive_port}" --query "SELECT 1" 2>&1 > /dev/null)" +[ "${error}" == "${exception_msg}" ]; echo "$?" diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 82faf3b21ed..678fe35fd96 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -1,5 +1,5 @@ CREATE TABLE system.aggregate_function_combinators\n(\n `name` String,\n `is_internal` UInt8\n)\nENGINE = SystemAggregateFunctionCombinators()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.asynchronous_inserts\n(\n `query` String,\n `database` String,\n `table` String,\n `format` String,\n `first_update` DateTime64(6),\n `last_update` DateTime64(6),\n `total_bytes` UInt64,\n `entries.query_id` Array(String),\n `entries.bytes` Array(UInt64),\n `entries.finished` Array(UInt8),\n `entries.exception` Array(String)\n)\nENGINE = AsynchronousInserts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.asynchronous_inserts\n(\n `query` String,\n `database` String,\n `table` String,\n `format` String,\n `first_update` DateTime64(6),\n `last_update` DateTime64(6),\n `total_bytes` UInt64,\n `entries.query_id` Array(String),\n `entries.bytes` Array(UInt64),\n `entries.finished` Array(UInt8),\n `entries.exception` Array(String)\n)\nENGINE = SystemAsynchronousInserts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.asynchronous_metrics\n(\n `metric` String,\n `value` Float64\n)\nENGINE = SystemAsynchronousMetrics()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.build_options\n(\n `name` String,\n `value` String\n)\nENGINE = SystemBuildOptions()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.clusters\n(\n `cluster` String,\n `shard_num` UInt32,\n `shard_weight` UInt32,\n `replica_num` UInt32,\n `host_name` String,\n `host_address` String,\n `port` UInt16,\n `is_local` UInt8,\n `user` String,\n `default_database` String,\n `errors_count` UInt32,\n `slowdowns_count` UInt32,\n `estimated_recovery_time` UInt32\n)\nENGINE = SystemClusters()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' diff --git a/tests/queries/0_stateless/02154_dictionary_get_http_json.reference b/tests/queries/0_stateless/02154_dictionary_get_http_json.reference new file mode 100644 index 00000000000..7106f551cd7 --- /dev/null +++ b/tests/queries/0_stateless/02154_dictionary_get_http_json.reference @@ -0,0 +1,24 @@ +0 Value +{ + "meta": + [ + { + "name": "dictGet(02154_test_dictionary, 'value', toUInt64(0))", + "type": "String" + }, + { + "name": "dictGet(02154_test_dictionary, 'value', toUInt64(1))", + "type": "String" + } + ], + + "data": + [ + { + "dictGet(02154_test_dictionary, 'value', toUInt64(0))": "Value", + "dictGet(02154_test_dictionary, 'value', toUInt64(1))": "" + } + ], + + "rows": 1 +} diff --git a/tests/queries/0_stateless/02154_dictionary_get_http_json.sh b/tests/queries/0_stateless/02154_dictionary_get_http_json.sh new file mode 100755 index 00000000000..a2bce866c76 --- /dev/null +++ b/tests/queries/0_stateless/02154_dictionary_get_http_json.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS 02154_test_source_table" + +$CLICKHOUSE_CLIENT -q """ + CREATE TABLE 02154_test_source_table + ( + id UInt64, + value String + ) ENGINE=TinyLog; +""" + +$CLICKHOUSE_CLIENT -q "INSERT INTO 02154_test_source_table VALUES (0, 'Value')" +$CLICKHOUSE_CLIENT -q "SELECT * FROM 02154_test_source_table" + +$CLICKHOUSE_CLIENT -q "DROP DICTIONARY IF EXISTS 02154_test_dictionary" +$CLICKHOUSE_CLIENT -q """ + CREATE DICTIONARY 02154_test_dictionary + ( + id UInt64, + value String + ) + PRIMARY KEY id + LAYOUT(HASHED()) + LIFETIME(0) + SOURCE(CLICKHOUSE(TABLE '02154_test_source_table')) +""" + +echo """ + SELECT dictGet(02154_test_dictionary, 'value', toUInt64(0)), dictGet(02154_test_dictionary, 'value', toUInt64(1)) + FORMAT JSON +""" | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&wait_end_of_query=1&output_format_write_statistics=0" -d @- + +$CLICKHOUSE_CLIENT -q "DROP DICTIONARY 02154_test_dictionary" +$CLICKHOUSE_CLIENT -q "DROP TABLE 02154_test_source_table" diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache.reference b/tests/queries/0_stateless/02174_cte_scalar_cache.reference new file mode 100644 index 00000000000..88456b1e7ea --- /dev/null +++ b/tests/queries/0_stateless/02174_cte_scalar_cache.reference @@ -0,0 +1,2 @@ +02177_CTE_GLOBAL_ON 5 500 11 0 5 +02177_CTE_GLOBAL_OFF 1 100 5 0 1 diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache.sql b/tests/queries/0_stateless/02174_cte_scalar_cache.sql new file mode 100644 index 00000000000..4b015cdd007 --- /dev/null +++ b/tests/queries/0_stateless/02174_cte_scalar_cache.sql @@ -0,0 +1,48 @@ +WITH + ( SELECT sleep(0.0001) FROM system.one ) as a1, + ( SELECT sleep(0.0001) FROM system.one ) as a2, + ( SELECT sleep(0.0001) FROM system.one ) as a3, + ( SELECT sleep(0.0001) FROM system.one ) as a4, + ( SELECT sleep(0.0001) FROM system.one ) as a5 +SELECT '02177_CTE_GLOBAL_ON', a5 FROM system.numbers LIMIT 100 +FORMAT Null +SETTINGS enable_global_with_statement = 1; + +WITH + ( SELECT sleep(0.0001) FROM system.one ) as a1, + ( SELECT sleep(0.0001) FROM system.one ) as a2, + ( SELECT sleep(0.0001) FROM system.one ) as a3, + ( SELECT sleep(0.0001) FROM system.one ) as a4, + ( SELECT sleep(0.0001) FROM system.one ) as a5 +SELECT '02177_CTE_GLOBAL_OFF', a5 FROM system.numbers LIMIT 100 + FORMAT Null +SETTINGS enable_global_with_statement = 0; + +SYSTEM FLUSH LOGS; +SELECT + '02177_CTE_GLOBAL_ON', + ProfileEvents['SleepFunctionCalls'] as sleep_calls, + ProfileEvents['SleepFunctionMicroseconds'] as sleep_microseconds, + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE '%SELECT ''02177_CTE_GLOBAL_ON%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; + +SELECT + '02177_CTE_GLOBAL_OFF', + ProfileEvents['SleepFunctionCalls'] as sleep_calls, + ProfileEvents['SleepFunctionMicroseconds'] as sleep_microseconds, + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE '%02177_CTE_GLOBAL_OFF%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference new file mode 100644 index 00000000000..246706164df --- /dev/null +++ b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference @@ -0,0 +1,63 @@ +4 4 4 4 5 +9 9 9 9 5 +14 14 14 14 5 +19 19 19 19 5 +24 24 24 24 5 +29 29 29 29 5 +34 34 34 34 5 +39 39 39 39 5 +44 44 44 44 5 +49 49 49 49 5 +54 54 54 54 5 +59 59 59 59 5 +64 64 64 64 5 +69 69 69 69 5 +74 74 74 74 5 +79 79 79 79 5 +84 84 84 84 5 +89 89 89 89 5 +94 94 94 94 5 +99 99 99 99 5 +02177_MV 7 80 22 +10 +40 +70 +100 +130 +160 +190 +220 +250 +280 +310 +340 +370 +400 +430 +460 +490 +520 +550 +580 +02177_MV_2 0 0 21 +8 +18 +28 +38 +48 +58 +68 +78 +88 +98 +108 +118 +128 +138 +148 +158 +168 +178 +188 +198 +02177_MV_3 19 0 2 diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql new file mode 100644 index 00000000000..4d4447c7f31 --- /dev/null +++ b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql @@ -0,0 +1,133 @@ +-- TEST CACHE +CREATE TABLE t1 (i Int64, j Int64) ENGINE = Memory; +INSERT INTO t1 SELECT number, number FROM system.numbers LIMIT 100; +CREATE TABLE t2 (k Int64, l Int64, m Int64, n Int64) ENGINE = Memory; + +CREATE MATERIALIZED VIEW mv1 TO t2 AS + WITH + (SELECT max(i) FROM t1) AS t1 + SELECT + t1 as k, -- Using local cache x 4 + t1 as l, + t1 as m, + t1 as n + FROM t1 + LIMIT 5; + +-- FIRST INSERT +INSERT INTO t1 +WITH + (SELECT max(i) FROM t1) AS t1 +SELECT + number as i, + t1 + t1 + t1 AS j -- Using global cache +FROM system.numbers +LIMIT 100 +SETTINGS + min_insert_block_size_rows=5, + max_insert_block_size=5, + min_insert_block_size_rows_for_materialized_views=5, + max_block_size=5, + max_threads=1; + +SELECT k, l, m, n, count() +FROM t2 +GROUP BY k, l, m, n +ORDER BY k, l, m, n; + +SYSTEM FLUSH LOGS; +-- The main query should have a cache miss and 3 global hits +-- The MV is executed 20 times (100 / 5) and each run does 1 miss and 4 hits to the LOCAL cache +-- In addition to this, to prepare the MV, there is an extra preparation to get the list of columns via +-- InterpreterSelectQuery, which adds 1 miss and 4 global hits (since it uses the global cache) +-- So in total we have: +-- Main query: 1 miss, 3 global +-- Preparation: 1 miss, 4 global +-- Blocks (20): 20 miss, 0 global, 80 local hits + +-- TOTAL: 22 miss, 7 global, 80 local +SELECT + '02177_MV', + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE '-- FIRST INSERT\nINSERT INTO t1\n%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; + +DROP TABLE mv1; + +CREATE TABLE t3 (z Int64) ENGINE = Memory; +CREATE MATERIALIZED VIEW mv2 TO t3 AS +SELECT + -- This includes an unnecessarily complex query to verify that the local cache is used (since it uses t1) + sum(i) + sum(j) + (SELECT * FROM (SELECT min(i) + min(j) FROM (SELECT * FROM system.one _a, t1 _b))) AS z +FROM t1; + +-- SECOND INSERT +INSERT INTO t1 +SELECT 0 as i, number as j from numbers(100) +SETTINGS + min_insert_block_size_rows=5, + max_insert_block_size=5, + min_insert_block_size_rows_for_materialized_views=5, + max_block_size=5, + max_threads=1; + +SELECT * FROM t3 ORDER BY z ASC; +SYSTEM FLUSH LOGS; +SELECT + '02177_MV_2', + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE '-- SECOND INSERT\nINSERT INTO t1%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; + +DROP TABLE mv2; + + +CREATE TABLE t4 (z Int64) ENGINE = Memory; +CREATE MATERIALIZED VIEW mv3 TO t4 AS +SELECT + -- This includes an unnecessarily complex query but now it uses t2 so it can be cached + min(i) + min(j) + (SELECT * FROM (SELECT min(k) + min(l) FROM (SELECT * FROM system.one _a, t2 _b))) AS z +FROM t1; + +-- THIRD INSERT +INSERT INTO t1 +SELECT number as i, number as j from numbers(100) + SETTINGS + min_insert_block_size_rows=5, + max_insert_block_size=5, + min_insert_block_size_rows_for_materialized_views=5, + max_block_size=5, + max_threads=1; +SYSTEM FLUSH LOGS; + +SELECT * FROM t4 ORDER BY z ASC; + +SELECT + '02177_MV_3', + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE '-- THIRD INSERT\nINSERT INTO t1%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; + +DROP TABLE mv3; +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t3; +DROP TABLE t4; diff --git a/tests/queries/0_stateless/02180_group_by_lowcardinality.reference b/tests/queries/0_stateless/02180_group_by_lowcardinality.reference new file mode 100644 index 00000000000..a7149390d1a --- /dev/null +++ b/tests/queries/0_stateless/02180_group_by_lowcardinality.reference @@ -0,0 +1,10 @@ +{"val":"1563.8","avg(toUInt32(val))":null} +{"val":"891.4","avg(toUInt32(val))":null} +{"val":"584.4","avg(toUInt32(val))":null} +{"val":"269","avg(toUInt32(val))":269} +{"val":"1233.4","avg(toUInt32(val))":null} +{"val":"1833","avg(toUInt32(val))":1833} +{"val":"1009.4","avg(toUInt32(val))":null} +{"val":"1178.6","avg(toUInt32(val))":null} +{"val":"372.6","avg(toUInt32(val))":null} +{"val":"232.4","avg(toUInt32(val))":null} diff --git a/tests/queries/0_stateless/02180_group_by_lowcardinality.sql b/tests/queries/0_stateless/02180_group_by_lowcardinality.sql new file mode 100644 index 00000000000..463753a624e --- /dev/null +++ b/tests/queries/0_stateless/02180_group_by_lowcardinality.sql @@ -0,0 +1,10 @@ +create table if not exists t_group_by_lowcardinality(p_date Date, val LowCardinality(Nullable(String))) +engine=MergeTree() partition by p_date order by tuple(); + +insert into t_group_by_lowcardinality select today() as p_date, toString(number/5) as val from numbers(10000); +insert into t_group_by_lowcardinality select today() as p_date, Null as val from numbers(100); + +select val, avg(toUInt32(val)) from t_group_by_lowcardinality group by val limit 10 settings max_threads=1, max_rows_to_group_by=100, group_by_overflow_mode='any' format JSONEachRow; + +drop table if exists t_group_by_lowcardinality; + diff --git a/tests/queries/0_stateless/02184_default_table_engine.reference b/tests/queries/0_stateless/02184_default_table_engine.reference new file mode 100644 index 00000000000..200578f3da9 --- /dev/null +++ b/tests/queries/0_stateless/02184_default_table_engine.reference @@ -0,0 +1,27 @@ +CREATE TABLE default.table_02184\n(\n `x` UInt8\n)\nENGINE = Log +CREATE TABLE default.table_02184\n(\n `x` UInt8\n)\nENGINE = MergeTree\nPRIMARY KEY x\nORDER BY x\nSETTINGS index_granularity = 8192 +CREATE TABLE default.test_optimize_exception\n(\n `date` Date\n)\nENGINE = MergeTree\nPARTITION BY toYYYYMM(date)\nORDER BY date\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_02184\n(\n `x` UInt8\n)\nENGINE = MergeTree\nORDER BY x\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_02184\n(\n `x` UInt8\n)\nENGINE = MergeTree\nPRIMARY KEY x\nORDER BY x\nSETTINGS index_granularity = 8192 +CREATE TABLE default.numbers1\n(\n `number` UInt64\n)\nENGINE = Memory +4.5 +45 +CREATE TABLE default.numbers2\n(\n `number` UInt64\n)\nENGINE = MergeTree\nORDER BY intHash32(number)\nSAMPLE BY intHash32(number)\nSETTINGS index_granularity = 8192 +45 +CREATE TABLE default.numbers3\n(\n `number` UInt64\n)\nENGINE = Log +CREATE MATERIALIZED VIEW default.test_view_filtered\n(\n `EventDate` Date,\n `CounterID` UInt32\n)\nENGINE = Memory AS\nSELECT\n CounterID,\n EventDate\nFROM default.test_table\nWHERE EventDate < \'2013-01-01\' +2014-01-02 0 0 1970-01-01 03:00:00 2014-01-02 03:04:06 +1 2014-01-02 07:04:06 +CREATE TABLE default.t1\n(\n `Rows` UInt64,\n `MaxHitTime` DateTime(\'UTC\')\n)\nENGINE = MergeTree\nORDER BY Rows\nSETTINGS index_granularity = 8192 +CREATE TABLE default.t2\n(\n `Rows` UInt64,\n `MaxHitTime` DateTime(\'UTC\')\n)\nENGINE = Memory +CREATE TABLE default.mt\n(\n `a` UInt64,\n `b` Nullable(String),\n INDEX b_index b TYPE set(123) GRANULARITY 1\n)\nENGINE = MergeTree\nPRIMARY KEY (a, coalesce(b, \'test\'))\nORDER BY (a, coalesce(b, \'test\'))\nSETTINGS index_granularity = 8192 +CREATE TABLE default.mt2\n(\n `a` UInt64,\n `b` Nullable(String),\n INDEX b_index b TYPE set(123) GRANULARITY 1\n)\nENGINE = MergeTree\nPRIMARY KEY (a, coalesce(b, \'test\'))\nORDER BY (a, coalesce(b, \'test\'))\nSETTINGS index_granularity = 8192 +CREATE TEMPORARY TABLE tmp\n(\n `n` Int32\n)\nENGINE = Memory +CREATE TABLE default.log\n(\n `n` Int32\n)\nENGINE = Log +CREATE TABLE default.log1\n(\n `n` Int32\n)\nENGINE = Log +CREATE TABLE default.mem\n(\n `n` Int32\n)\nENGINE = Memory +CREATE TABLE default.mt\n(\n `n` UInt8\n)\nENGINE = MergeTree\nORDER BY n\nSETTINGS index_granularity = 8192 +CREATE TABLE default.mem\n(\n `n` UInt8\n)\nENGINE = Memory +CREATE TABLE default.val\n(\n `n` Int32\n) AS values(\'n int\', 1, 2) +CREATE TABLE default.val2\n(\n `n` Int32\n) AS values(\'n int\', 1, 2) +CREATE TABLE default.log\n(\n `n` Int32\n)\nENGINE = Log diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql new file mode 100644 index 00000000000..d129ccc801e --- /dev/null +++ b/tests/queries/0_stateless/02184_default_table_engine.sql @@ -0,0 +1,120 @@ +CREATE TABLE table_02184 (x UInt8); --{serverError 119} +SET default_table_engine = 'Log'; +CREATE TABLE table_02184 (x UInt8); +SHOW CREATE TABLE table_02184; +DROP TABLE table_02184; + +SET default_table_engine = 'MergeTree'; +CREATE TABLE table_02184 (x UInt8); --{serverError 42} +CREATE TABLE table_02184 (x UInt8, PRIMARY KEY (x)); +SHOW CREATE TABLE table_02184; +DROP TABLE table_02184; + +CREATE TABLE test_optimize_exception (date Date) PARTITION BY toYYYYMM(date) ORDER BY date; +SHOW CREATE TABLE test_optimize_exception; +DROP TABLE test_optimize_exception; +CREATE TABLE table_02184 (x UInt8) PARTITION BY x; --{serverError 36} +CREATE TABLE table_02184 (x UInt8) ORDER BY x; +SHOW CREATE TABLE table_02184; +DROP TABLE table_02184; + +CREATE TABLE table_02184 (x UInt8) PRIMARY KEY x; +SHOW CREATE TABLE table_02184; +DROP TABLE table_02184; +SET default_table_engine = 'Memory'; +CREATE TABLE numbers1 AS SELECT number FROM numbers(10); +SHOW CREATE TABLE numbers1; +SELECT avg(number) FROM numbers1; +DROP TABLE numbers1; + +SET default_table_engine = 'MergeTree'; +CREATE TABLE numbers2 ORDER BY intHash32(number) SAMPLE BY intHash32(number) AS SELECT number FROM numbers(10); +SELECT sum(number) FROM numbers2; +SHOW CREATE TABLE numbers2; +DROP TABLE numbers2; + +CREATE TABLE numbers3 ENGINE = Log AS SELECT number FROM numbers(10); +SELECT sum(number) FROM numbers3; +SHOW CREATE TABLE numbers3; +DROP TABLE numbers3; + +CREATE TABLE test_table (EventDate Date, CounterID UInt32, UserID UInt64, EventTime DateTime('Europe/Moscow'), UTCEventTime DateTime('UTC')) PARTITION BY EventDate PRIMARY KEY CounterID; +SET default_table_engine = 'Memory'; +CREATE MATERIALIZED VIEW test_view (Rows UInt64, MaxHitTime DateTime('Europe/Moscow')) AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; +CREATE MATERIALIZED VIEW test_view_filtered (EventDate Date, CounterID UInt32) POPULATE AS SELECT CounterID, EventDate FROM test_table WHERE EventDate < '2013-01-01'; +SHOW CREATE TABLE test_view_filtered; +INSERT INTO test_table (EventDate, UTCEventTime) VALUES ('2014-01-02', '2014-01-02 03:04:06'); + +SELECT * FROM test_table; +SELECT * FROM test_view; +SELECT * FROM test_view_filtered; + +DROP TABLE test_view; +DROP TABLE test_view_filtered; + +SET default_table_engine = 'MergeTree'; +CREATE MATERIALIZED VIEW test_view ORDER BY Rows AS SELECT count() AS Rows, max(UTCEventTime) AS MaxHitTime FROM test_table; +SET default_table_engine = 'Memory'; +CREATE TABLE t1 AS test_view; +CREATE TABLE t2 ENGINE=Memory AS test_view; +SHOW CREATE TABLE t1; +SHOW CREATE TABLE t2; +DROP TABLE test_view; +DROP TABLE test_table; +DROP TABLE t1; +DROP TABLE t2; + + +CREATE DATABASE test_02184 ORDER BY kek; -- {serverError 80} +CREATE DATABASE test_02184 SETTINGS x=1; -- {serverError 80} +CREATE TABLE table_02184 (x UInt8, y int, PRIMARY KEY (x)) ENGINE=MergeTree PRIMARY KEY y; -- {clientError 36} +SET default_table_engine = 'MergeTree'; +CREATE TABLE table_02184 (x UInt8, y int, PRIMARY KEY (x)) PRIMARY KEY y; -- {clientError 36} + +CREATE TABLE mt (a UInt64, b Nullable(String), PRIMARY KEY (a, coalesce(b, 'test')), INDEX b_index b TYPE set(123) GRANULARITY 1); +SHOW CREATE TABLE mt; +SET default_table_engine = 'Log'; +CREATE TABLE mt2 AS mt; +SHOW CREATE TABLE mt2; +DROP TABLE mt; + +SET default_table_engine = 'Log'; +CREATE TEMPORARY TABLE tmp (n int); +SHOW CREATE TEMPORARY TABLE tmp; +CREATE TEMPORARY TABLE tmp1 (n int) ENGINE=Memory; +CREATE TEMPORARY TABLE tmp2 (n int) ENGINE=Log; -- {serverError 80} +CREATE TEMPORARY TABLE tmp2 (n int) ORDER BY n; -- {serverError 80} +CREATE TEMPORARY TABLE tmp2 (n int, PRIMARY KEY (n)); -- {serverError 80} + +CREATE TABLE log (n int); +SHOW CREATE log; +SET default_table_engine = 'MergeTree'; +CREATE TABLE log1 AS log; +SHOW CREATE log1; +CREATE TABLE mem AS log1 ENGINE=Memory; +SHOW CREATE mem; +DROP TABLE log; +DROP TABLE log1; +DROP TABLE mem; + +SET default_table_engine = 'None'; +CREATE TABLE mem AS SELECT 1 as n; --{serverError 119} +SET default_table_engine = 'Memory'; +CREATE TABLE mem ORDER BY n AS SELECT 1 as n; -- {serverError 36} +SET default_table_engine = 'MergeTree'; +CREATE TABLE mt ORDER BY n AS SELECT 1 as n; +CREATE TABLE mem ENGINE=Memory AS SELECT 1 as n; +SHOW CREATE TABLE mt; +SHOW CREATE TABLE mem; +DROP TABLE mt; +DROP TABLE mem; + +CREATE TABLE val AS values('n int', 1, 2); +CREATE TABLE val2 AS val; +CREATE TABLE log ENGINE=Log AS val; +SHOW CREATE TABLE val; +SHOW CREATE TABLE val2; +SHOW CREATE TABLE log; +DROP TABLE val; +DROP TABLE val2; +DROP TABLE log; diff --git a/tests/queries/0_stateless/02184_table_engine_access.reference b/tests/queries/0_stateless/02184_table_engine_access.reference new file mode 100644 index 00000000000..99a67d4daa4 --- /dev/null +++ b/tests/queries/0_stateless/02184_table_engine_access.reference @@ -0,0 +1,2 @@ +ACCESS_DENIED +CREATE TABLE default.t\n(\n `line` String\n)\nENGINE = URL(\'https://clickhouse.com\', \'LineAsString\') diff --git a/tests/queries/0_stateless/02184_table_engine_access.sh b/tests/queries/0_stateless/02184_table_engine_access.sh new file mode 100755 index 00000000000..dbbf28e46d4 --- /dev/null +++ b/tests/queries/0_stateless/02184_table_engine_access.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Tags: no-parallel +# Tag no-parallel: create user + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "DROP USER IF EXISTS user_test_02184;" +$CLICKHOUSE_CLIENT --query "CREATE USER user_test_02184 IDENTIFIED WITH plaintext_password BY 'user_test_02184';" +${CLICKHOUSE_CLIENT} -q "REVOKE ALL ON *.* FROM user_test_02184" + +$CLICKHOUSE_CLIENT --query "GRANT CREATE ON *.* TO user_test_02184;" + +$CLICKHOUSE_CLIENT --query "CREATE TABLE url ENGINE=URL('https://clickhouse.com', LineAsString)" + +$CLICKHOUSE_CLIENT --user=user_test_02184 --password=user_test_02184 --query "CREATE TABLE t AS url" 2>&1| grep -Fo "ACCESS_DENIED" | uniq + +$CLICKHOUSE_CLIENT --query "GRANT URL ON *.* TO user_test_02184;" +$CLICKHOUSE_CLIENT --user=user_test_02184 --password=user_test_02184 --query "CREATE TABLE t AS url" +$CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE t" +$CLICKHOUSE_CLIENT --query "DROP TABLE t" +$CLICKHOUSE_CLIENT --query "DROP TABLE url" diff --git a/tests/queries/0_stateless/02187_insert_values_with_mv.reference b/tests/queries/0_stateless/02187_insert_values_with_mv.reference new file mode 100644 index 00000000000..d24ee4faff7 --- /dev/null +++ b/tests/queries/0_stateless/02187_insert_values_with_mv.reference @@ -0,0 +1,12 @@ +VALUES 1 +TABLE 1 +VALUES 1 +VALUES 1 +VALUES 1 +VALUES 1 +VALUES 1 +TABLE 1 +TABLE 1 +TABLE 1 +TABLE 1 +TABLE 1 diff --git a/tests/queries/0_stateless/02187_insert_values_with_mv.sql b/tests/queries/0_stateless/02187_insert_values_with_mv.sql new file mode 100644 index 00000000000..91241f7f5c3 --- /dev/null +++ b/tests/queries/0_stateless/02187_insert_values_with_mv.sql @@ -0,0 +1,59 @@ +CREATE TABLE IF NOT EXISTS a (a Int64) ENGINE=Memory; +CREATE TABLE IF NOT EXISTS b (a Int64) ENGINE=Memory; +CREATE MATERIALIZED VIEW IF NOT EXISTS mv1 TO b AS Select sleepEachRow(0.05) as a FROM a; +CREATE MATERIALIZED VIEW IF NOT EXISTS mv2 TO b AS Select sleepEachRow(0.05) as a FROM a; +CREATE MATERIALIZED VIEW IF NOT EXISTS mv3 TO b AS Select sleepEachRow(0.05) as a FROM a; +CREATE MATERIALIZED VIEW IF NOT EXISTS mv4 TO b AS Select sleepEachRow(0.05) as a FROM a; +CREATE MATERIALIZED VIEW IF NOT EXISTS mv5 TO b AS Select sleepEachRow(0.05) as a FROM a; + +-- INSERT USING VALUES +INSERT INTO a VALUES (1); +-- INSERT USING TABLE +INSERT INTO a SELECT * FROM system.one; +SYSTEM FLUSH LOGS; + +SELECT 'VALUES', query_duration_ms >= 250 +FROM system.query_log +WHERE + current_database = currentDatabase() + AND event_date >= yesterday() + AND query LIKE '-- INSERT USING VALUES%' + AND type = 'QueryFinish' +LIMIT 1; + +SELECT 'TABLE', query_duration_ms >= 250 +FROM system.query_log +WHERE + current_database = currentDatabase() + AND event_date >= yesterday() + AND query LIKE '-- INSERT USING VALUES%' + AND type = 'QueryFinish' +LIMIT 1; + +WITH + ( + SELECT initial_query_id + FROM system.query_log + WHERE + current_database = currentDatabase() + AND event_date >= yesterday() + AND query LIKE '-- INSERT USING VALUES%' + LIMIT 1 + ) AS q_id +SELECT 'VALUES', view_duration_ms >= 50 +FROM system.query_views_log +WHERE initial_query_id = q_id; + +WITH +( + SELECT initial_query_id + FROM system.query_log + WHERE + current_database = currentDatabase() + AND event_date >= yesterday() + AND query LIKE '-- INSERT USING TABLE%' + LIMIT 1 +) AS q_id +SELECT 'TABLE', view_duration_ms >= 50 +FROM system.query_views_log +WHERE initial_query_id = q_id; diff --git a/tests/queries/0_stateless/02205_HTTP_user_agent.python b/tests/queries/0_stateless/02205_HTTP_user_agent.python new file mode 100644 index 00000000000..8fb9cea0845 --- /dev/null +++ b/tests/queries/0_stateless/02205_HTTP_user_agent.python @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + +from http.server import SimpleHTTPRequestHandler,HTTPServer +import socket +import sys +import threading +import os +import traceback +import urllib.request +import subprocess + + +def is_ipv6(host): + try: + socket.inet_aton(host) + return False + except: + return True + +def get_local_port(host, ipv6): + if ipv6: + family = socket.AF_INET6 + else: + family = socket.AF_INET + + with socket.socket(family) as fd: + fd.bind((host, 0)) + return fd.getsockname()[1] + +CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', 'localhost') +CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') + +# Server returns this JSON response. +SERVER_JSON_RESPONSE = \ +'''{ + "login": "ClickHouse", + "id": 54801242, + "name": "ClickHouse", + "company": null +}''' + +EXPECTED_ANSWER = \ +'''{\\n\\t"login": "ClickHouse",\\n\\t"id": 54801242,\\n\\t"name": "ClickHouse",\\n\\t"company": null\\n}''' + +##################################################################################### +# This test starts an HTTP server and serves data to clickhouse url-engine based table. +# The objective of this test is to check the ClickHouse server provides a User-Agent +# with HTTP requests. +# In order for it to work ip+port of http server (given below) should be +# accessible from clickhouse server. +##################################################################################### + +# IP-address of this host accessible from the outside world. Get the first one +HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0] +IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) +HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) + +# IP address and port of the HTTP server started from this script. +HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) +if IS_IPV6: + HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/" +else: + HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + + +def get_ch_answer(query): + host = CLICKHOUSE_HOST + if IS_IPV6: + host = f'[{host}]' + + url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + return urllib.request.urlopen(url, data=query.encode()).read().decode() + +def check_answers(query, answer): + ch_answer = get_ch_answer(query) + if ch_answer.strip() != answer.strip(): + print("FAIL on query:", query, file=sys.stderr) + print("Expected answer:", answer, file=sys.stderr) + print("Fetched answer :", ch_answer, file=sys.stderr) + raise Exception("Fail on query") + +# Server with check for User-Agent headers. +class HttpProcessor(SimpleHTTPRequestHandler): + def _set_headers(self): + user_agent = self.headers.get('User-Agent') + if user_agent and user_agent.startswith('ClickHouse/'): + self.send_response(200) + else: + self.send_response(403) + + self.send_header('Content-Type', 'text/csv') + self.end_headers() + + def do_GET(self): + self._set_headers() + self.wfile.write(SERVER_JSON_RESPONSE.encode()) + + def log_message(self, format, *args): + return + +class HTTPServerV6(HTTPServer): + address_family = socket.AF_INET6 + +def start_server(requests_amount): + if IS_IPV6: + httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) + else: + httpd = HTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor) + + def real_func(): + for i in range(requests_amount): + httpd.handle_request() + + t = threading.Thread(target=real_func) + return t + +##################################################################### +# Testing area. +##################################################################### + +def test_select(): + global HTTP_SERVER_URL_STR + query = 'SELECT * FROM url(\'{}\',\'JSONAsString\');'.format(HTTP_SERVER_URL_STR) + check_answers(query, EXPECTED_ANSWER) + +def main(): + t = start_server(1) + t.start() + test_select() + t.join() + print("PASSED") + +if __name__ == "__main__": + try: + main() + except Exception as ex: + exc_type, exc_value, exc_traceback = sys.exc_info() + traceback.print_tb(exc_traceback, file=sys.stderr) + print(ex, file=sys.stderr) + sys.stderr.flush() + + os._exit(1) + diff --git a/tests/queries/0_stateless/02205_HTTP_user_agent.reference b/tests/queries/0_stateless/02205_HTTP_user_agent.reference new file mode 100644 index 00000000000..53cdf1e9393 --- /dev/null +++ b/tests/queries/0_stateless/02205_HTTP_user_agent.reference @@ -0,0 +1 @@ +PASSED diff --git a/tests/queries/0_stateless/02205_HTTP_user_agent.sh b/tests/queries/0_stateless/02205_HTTP_user_agent.sh new file mode 100755 index 00000000000..b125e91ae85 --- /dev/null +++ b/tests/queries/0_stateless/02205_HTTP_user_agent.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +python3 "$CURDIR"/02205_HTTP_user_agent.python + diff --git a/tests/queries/0_stateless/02207_key_condition_floats.reference b/tests/queries/0_stateless/02207_key_condition_floats.reference new file mode 100644 index 00000000000..6c78023f8c0 --- /dev/null +++ b/tests/queries/0_stateless/02207_key_condition_floats.reference @@ -0,0 +1,9 @@ +2 +2 +2 +2 +2 +2 +2 +2 +1 diff --git a/tests/queries/0_stateless/02207_key_condition_floats.sql b/tests/queries/0_stateless/02207_key_condition_floats.sql new file mode 100644 index 00000000000..65527c65290 --- /dev/null +++ b/tests/queries/0_stateless/02207_key_condition_floats.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS t_key_condition_float; + +CREATE TABLE t_key_condition_float (a Float32) +ENGINE = MergeTree ORDER BY a; + +INSERT INTO t_key_condition_float VALUES (0.1), (0.2); + +SELECT count() FROM t_key_condition_float WHERE a > 0; +SELECT count() FROM t_key_condition_float WHERE a > 0.0; +SELECT count() FROM t_key_condition_float WHERE a > 0::Float32; +SELECT count() FROM t_key_condition_float WHERE a > 0::Float64; + +DROP TABLE t_key_condition_float; + +CREATE TABLE t_key_condition_float (a Float64) +ENGINE = MergeTree ORDER BY a; + +INSERT INTO t_key_condition_float VALUES (0.1), (0.2); + +SELECT count() FROM t_key_condition_float WHERE a > 0; +SELECT count() FROM t_key_condition_float WHERE a > 0.0; +SELECT count() FROM t_key_condition_float WHERE a > 0::Float32; +SELECT count() FROM t_key_condition_float WHERE a > 0::Float64; + +DROP TABLE t_key_condition_float; + +CREATE TABLE t_key_condition_float (a UInt64) +ENGINE = MergeTree ORDER BY a; + +INSERT INTO t_key_condition_float VALUES (1), (2); + +SELECT count() FROM t_key_condition_float WHERE a > 1.5; + +DROP TABLE t_key_condition_float; diff --git a/tests/queries/0_stateless/02209_short_circuit_node_without_parents.reference b/tests/queries/0_stateless/02209_short_circuit_node_without_parents.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02209_short_circuit_node_without_parents.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02209_short_circuit_node_without_parents.sql b/tests/queries/0_stateless/02209_short_circuit_node_without_parents.sql new file mode 100644 index 00000000000..c20ca83591f --- /dev/null +++ b/tests/queries/0_stateless/02209_short_circuit_node_without_parents.sql @@ -0,0 +1,2 @@ +SELECT 1 FROM (SELECT arrayJoin(if(empty(range(number)), [1], [2])) from numbers(1)); + diff --git a/tests/queries/0_stateless/02210_append_to_dev_dull.reference b/tests/queries/0_stateless/02210_append_to_dev_dull.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02210_append_to_dev_dull.sql b/tests/queries/0_stateless/02210_append_to_dev_dull.sql new file mode 100644 index 00000000000..a8aaa2f05ab --- /dev/null +++ b/tests/queries/0_stateless/02210_append_to_dev_dull.sql @@ -0,0 +1,6 @@ +-- Tags: no-fasttest + +insert into table function file('/dev/null', 'Parquet', 'number UInt64') select * from numbers(10); +insert into table function file('/dev/null', 'ORC', 'number UInt64') select * from numbers(10); +insert into table function file('/dev/null', 'JSON', 'number UInt64') select * from numbers(10); + diff --git a/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference b/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference new file mode 100644 index 00000000000..2ac2f690f1b --- /dev/null +++ b/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference @@ -0,0 +1 @@ +Const(UInt8) diff --git a/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.sql b/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.sql new file mode 100644 index 00000000000..a71c3f30604 --- /dev/null +++ b/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.sql @@ -0,0 +1 @@ +SELECT toColumnTypeName(toLowCardinality(1)); diff --git a/tests/queries/0_stateless/02211_jsonl_format_extension.reference b/tests/queries/0_stateless/02211_jsonl_format_extension.reference new file mode 100644 index 00000000000..8b1acc12b63 --- /dev/null +++ b/tests/queries/0_stateless/02211_jsonl_format_extension.reference @@ -0,0 +1,10 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/queries/0_stateless/02211_jsonl_format_extension.sql b/tests/queries/0_stateless/02211_jsonl_format_extension.sql new file mode 100644 index 00000000000..08fff5a11f5 --- /dev/null +++ b/tests/queries/0_stateless/02211_jsonl_format_extension.sql @@ -0,0 +1,3 @@ +-- Tags: no-fasttest +insert into table function file('data.jsonl', 'JSONEachRow', 'x UInt32') select * from numbers(10); +select * from file('data.jsonl'); diff --git a/uncrustify.cfg b/uncrustify.cfg deleted file mode 100644 index 9fefc1270de..00000000000 --- a/uncrustify.cfg +++ /dev/null @@ -1,252 +0,0 @@ -# Configuration file for Uncrustify code formatter. -# https://github.com/uncrustify/uncrustify -# -# Created with https://cdanu.github.io/uncrustify_config_preview/index.html -# -# You may apply it for your code with: -# uncrustify -l CPP -c uncrustify.cfg -f filename.cpp -# -# This config is in beta: it doesn't implement our style guide perfectly. -# It's not recommended to apply it for existing code base. - -newlines = lf -input_tab_size = 4 -output_tab_size = 4 -string_replace_tab_chars = true -utf8_bom = remove -utf8_byte = true -utf8_force = true -sp_arith = force -sp_assign = force -sp_cpp_lambda_assign = remove -sp_cpp_lambda_paren = remove -sp_assign_default = force -sp_enum_assign = force -sp_enum_colon = force -sp_pp_concat = force -sp_pp_stringify = remove -sp_bool = force -sp_compare = force -sp_inside_paren = remove -sp_paren_paren = remove -sp_paren_brace = force -sp_before_ptr_star = force -sp_between_ptr_star = remove -sp_after_ptr_star = force -sp_after_ptr_star_qualifier = force -sp_after_ptr_star_func = force -sp_ptr_star_paren = force -sp_before_ptr_star_func = force -sp_before_byref = force -sp_before_unnamed_byref = force -sp_after_byref = force -sp_after_byref_func = force -sp_before_byref_func = force -sp_template_angle = force -sp_before_angle = remove -sp_inside_angle = remove -sp_angle_colon = force -sp_after_angle = force -sp_angle_paren = remove -sp_angle_paren_empty = remove -sp_angle_word = force -sp_angle_shift = remove -sp_permit_cpp11_shift = true -sp_before_sparen = force -sp_inside_sparen = remove -sp_after_sparen = force -sp_sparen_brace = force -sp_special_semi = force -sp_before_semi_for = remove -sp_before_semi_for_empty = remove -sp_after_semi = force -sp_after_semi_for_empty = remove -sp_before_square = remove -sp_before_squares = remove -sp_inside_square = remove -sp_after_comma = force -sp_before_ellipsis = remove -sp_after_class_colon = force -sp_before_class_colon = force -sp_after_constr_colon = force -sp_before_constr_colon = force -sp_after_operator = remove -sp_after_operator_sym = remove -sp_after_cast = remove -sp_inside_paren_cast = remove -sp_cpp_cast_paren = remove -sp_sizeof_paren = remove -sp_inside_braces_enum = force -sp_inside_braces_struct = force -sp_inside_braces = force -sp_inside_braces_empty = remove -sp_type_func = force -sp_func_proto_paren = remove -sp_func_proto_paren_empty = remove -sp_func_def_paren = remove -sp_func_def_paren_empty = remove -sp_inside_fparens = remove -sp_inside_fparen = remove -sp_inside_tparen = remove -sp_after_tparen_close = remove -sp_square_fparen = remove -sp_fparen_brace = force -sp_func_call_paren = remove -sp_func_class_paren = remove -sp_func_class_paren_empty = remove -sp_return_paren = force -sp_attribute_paren = remove -sp_defined_paren = remove -sp_throw_paren = force -sp_after_throw = force -sp_catch_paren = force -sp_macro = add -sp_macro_func = add -sp_else_brace = force -sp_brace_else = force -sp_brace_typedef = force -sp_catch_brace = force -sp_brace_catch = force -sp_try_brace = force -sp_word_brace = remove -sp_word_brace_ns = force -sp_before_dc = remove -sp_after_dc = remove -sp_cond_colon = force -sp_cond_colon_before = force -sp_cond_colon_after = force -sp_cond_question = force -sp_cond_question_before = force -sp_cond_question_after = force -sp_cond_ternary_short = remove -sp_cmt_cpp_start = force -sp_cmt_cpp_doxygen = true -sp_cmt_cpp_qttr = true -sp_endif_cmt = force -sp_after_new = force -sp_between_new_paren = remove -sp_after_newop_paren = force -sp_inside_newop_paren = remove -sp_before_tr_emb_cmt = force -indent_columns = 4 -indent_with_tabs = 0 -indent_namespace = false -indent_namespace_limit = 100 -indent_class = true -indent_ctor_init_leading = 1 -indent_shift = true -indent_func_call_param = true -indent_func_def_param = true -indent_func_proto_param = true -indent_func_class_param = true -indent_func_ctor_var_param = true -indent_template_param = true -indent_member = 4 -indent_switch_case = 4 -indent_switch_pp = false -indent_label = 0 -indent_access_spec = -4 -indent_paren_close = 2 -indent_paren_after_func_def = true -indent_paren_after_func_decl = true -indent_paren_after_func_call = true -indent_align_assign = false -indent_token_after_brace = false -indent_cpp_lambda_body = true -indent_ternary_operator = 1 -nl_assign_leave_one_liners = true -nl_class_leave_one_liners = true -nl_enum_leave_one_liners = true -nl_getset_leave_one_liners = true -nl_func_leave_one_liners = true -nl_cpp_lambda_leave_one_liners = true -nl_cpp_ldef_brace = add -nl_if_leave_one_liners = true -nl_start_of_file = remove -nl_end_of_file = force -nl_enum_brace = add -nl_struct_brace = add -nl_union_brace = add -nl_if_brace = add -nl_brace_else = add -nl_else_brace = add -nl_else_if = remove -nl_before_if_closing_paren = remove -nl_try_brace = add -nl_for_brace = add -nl_catch_brace = add -nl_brace_catch = add -nl_while_brace = add -nl_do_brace = add -nl_brace_while = remove -nl_switch_brace = add -nl_multi_line_define = true -nl_before_case = true -nl_after_case = true -nl_case_colon_brace = add -nl_namespace_brace = add -nl_template_class = add -nl_class_brace = add -nl_enum_own_lines = add -nl_func_scope_name = remove -nl_func_paren = remove -nl_func_def_paren = remove -nl_func_call_paren = remove -nl_func_call_paren_empty = remove -nl_func_decl_start_multi_line = true -nl_func_def_start_multi_line = true -nl_func_decl_args_multi_line = true -nl_func_def_args_multi_line = true -nl_func_decl_end = remove -nl_func_def_end = remove -nl_func_decl_empty = remove -nl_func_def_empty = remove -nl_func_call_empty = remove -nl_func_call_start_multi_line = true -nl_func_call_args_multi_line = true -nl_fdef_brace = add -nl_after_semicolon = true -nl_constr_colon = force -nl_split_if_one_liner = true -nl_split_for_one_liner = true -nl_split_while_one_liner = true -nl_max = 3 -nl_max_blank_in_func = 2 -nl_after_func_proto = 1 -nl_after_func_proto_group = 2 -nl_after_func_class_proto = 1 -nl_after_func_class_proto_group = 2 -nl_before_func_body_def = 1 -nl_before_func_body_proto = 1 -nl_after_func_body = 3 -nl_after_func_body_class = 3 -nl_after_func_body_one_liner = 1 -nl_after_multiline_comment = true -nl_after_struct = 3 -nl_before_class = 3 -nl_after_class = 3 -nl_before_access_spec = 2 -nl_after_access_spec = 1 -nl_after_try_catch_finally = 1 -eat_blanks_after_open_brace = true -eat_blanks_before_close_brace = true -nl_remove_extra_newlines = 1 -nl_after_return = true -pos_constr_comma = lead_break -pos_constr_colon = lead_force -code_width = 160 -ls_func_split_full = true -ls_code_width = true -align_left_shift = false -cmt_convert_tab_to_spaces = true -mod_full_brace_for = remove -mod_full_brace_if = remove -mod_full_brace_if_chain = true -mod_full_brace_while = remove -mod_paren_on_return = remove -mod_remove_extra_semicolon = true -mod_remove_empty_return = true -align_func_params = true -align_func_params_thresh = 140 -sp_inside_type_brace_init_lst = remove -nl_constr_init_args = add diff --git a/website/support/agreement/index.html b/website/support/agreement/index.html new file mode 100644 index 00000000000..59e5ca9ab75 --- /dev/null +++ b/website/support/agreement/index.html @@ -0,0 +1,27 @@ +{% set prefetch_items = [ + ('/docs/en/', 'document') +] %} + +{% extends "templates/base.html" %} + +{% block extra_meta %} + +{% include "templates/common_fonts.html" %} +{% endblock %} + +{% block nav %} + +{% include "templates/global/nav.html" %} + +{% endblock %} + +{% block content %} + +{% include "templates/support/agreement-hero.html" %} + +{% include "templates/support/agreement-content.html" %} + +{% include "templates/global/newsletter.html" %} +{% include "templates/global/github_stars.html" %} + +{% endblock %} diff --git a/website/templates/support/agreement-content.html b/website/templates/support/agreement-content.html new file mode 100644 index 00000000000..4ca64e69599 --- /dev/null +++ b/website/templates/support/agreement-content.html @@ -0,0 +1,120 @@ +
+
+ +

This ClickHouse Subscription Agreement, including all referenced URLs, which are incorporated herein by reference (collectively, this “Agreement”), is entered into as of the date on which an applicable Order Form is fully executed (“Effective Date”), by and between the ClickHouse entity ("ClickHouse") set forth on such Order Form, and the entity identified thereon as the “Customer” (“Customer”).

+ +

1. DEFINITIONS
Capitalized terms used herein have the meaning ascribed below, or where such terms are first used, as applicable.

+ +

1.1 "Affiliate" means, with respect to a party, any entity that controls, is controlled by, or which is under common control with, such party, where "control" means ownership of at least fifty percent (50%) of the outstanding voting shares of the entity, or the contractual right to establish policy for, and manage the operations of, the entity.

+ +

1.2 "Order Form" means an ordering document provided by ClickHouse pursuant to which Customer purchases Subscriptions under this Agreement.

+ +

1.3 "Qualifying PO" means a purchase order issued by customer for the purpose of purchasing a Subscription, which (i) references the number of an applicable Order Form provided to Customer by ClickHouse and (ii) clearly states the purchase order is subject to the terms and conditions of this Agreement.

+ +

1.4 "Software" means the ClickHouse software of the same name that is licensed for use under the Apache 2.0 license.

+ +

1.5 "Subscription" means Customer's right, for a fixed period of time, to receive Support Services, as set forth in the applicable Order Form.

+ +

1.6 "Subscription Term" means the period of time for which a Subscription is valid, as further described in Section 7.1 of this Agreement.

+ +

1.7 "Support Services" means maintenance and support services for the Software, as more fully described in the Support Services Policy.

+ +

1.8 "Support Services Policy" means ClickHouse's support services policy as further described at https://clickhouse.com/support/policy/. ClickHouse reserves the right to reasonably modify the Support Services Policy during a Subscription Term, provided however, ClickHouse shall not materially diminish the level of Support Services during a Subscription Term. The effective date of each version of the Support Services Policy will be stated thereon, and ClickHouse agrees to archive copies of each version, and make the same available to Customer upon written request (e-mail sufficient). The parties agree that the Support Services Policy is hereby incorporated into these terms and conditions by this reference.

+ +

2. AGREEMENT SCOPE AND PERFORMANCE OF SUPPORT SERVICES

+ +

2.1 Agreement Scope. This Agreement includes terms and conditions applicable to Subscriptions for Support Services purchased under each Order Form entered into by the parties under Section 2.2 below, which Support Services may be used by Customer solely for Internal use and in connection with the use case(s) set forth on the applicable Order Form.

+ +

2.2 Order for Support Services Subscriptions. Orders for Subscriptions may be placed by Customer through (1) the execution of Order Forms with ClickHouse or (2) issuance by Customer of a Qualifying PO, which will be deemed to constitute, for the purposes of this Agreement, the execution by Customer of the referenced Order Form.

+ +

2.3 Affiliates. The parties agree that their respective Affiliates may also conduct business under this Agreement by entering into Order Forms, which in some cases may be subject to such additional and/or alternative terms and conditions to those contained in this Agreement as may be mutually agreed in the Order Form or an attachment thereto, as applicable. Accordingly, where Affiliates of the parties conduct business hereunder, references to Customer herein shall include any applicable Customer Affiliate, and references to ClickHouse herein shall include any applicable ClickHouse Affiliate. The parties agree that where either of them or one of their Affiliates enters into an Order Form with an Affiliate of the other party, that such Affiliate shall be solely responsible for performing all of its obligations under this Agreement in connection with such Order Form.

+ +

2.4 Performance of Support Services. Subject to Customer’s payment of all fees (as set forth in an applicable Order Form), ClickHouse will provide Customer with Support Services for the Software during an applicable Subscription Term in accordance with this Agreement and the Support Services Policy. Customer will reasonably cooperate with ClickHouse in connection with the Support Services, including, without limitation, by providing ClickHouse reasonable remote access to its installations, server cloud (or hosting provider), Software and equipment in connection therewith. Further, Customer will designate appropriately skilled personnel to serve as ClickHouse’s central contacts in connection with the use, operation and support of the Software. Customer understands that ClickHouse’s performance of Support Services is dependent in part on Customer’s cooperation, actions, and performance. ClickHouse shall not be responsible for any delays or interruptions in its performance of Support Services, or any claims arising therefrom, due to Customer’s lack of cooperation or acts or omissions. ClickHouse may use its Affiliates or subcontractors to provide Support Services to Customer, provided that ClickHouse remains responsible to Customer for performance.

+ +

3. PAYMENT AND TAXES

+ +

3.1 Payment. ClickHouse will invoice Customer for the fees due under each Order Form or otherwise under this Agreement, and Customer will pay such fees within thirty (30) days after receipt of an applicable invoice. All invoices will be paid in the currency set forth on the applicable Order Form. Payments will be made without right of set-off or chargeback. Except as otherwise expressly provided in this Agreement, any and all payments made by Customer pursuant to this Agreement or any Order Form are non-refundable, and all commitments to make any payments hereunder or under any Order Form are non-cancellable.

+ +

3.2 Taxes. All fees stated on an Order Form are exclusive of any applicable sales, use, value added and excise taxes levied upon the delivery or use of the taxable components, if any, of any Subscription purchased by Customer under this Agreement (collectively, “Taxes”). Taxes do not include any taxes on the net income of ClickHouse or any of its Affiliates. Unless Customer provides ClickHouse a valid state sales/use/excise tax exemption certificate or Direct Pay Permit, and provided that ClickHouse separately states any such taxes in the applicable invoice, Customer will pay and be solely responsible for all Taxes. If Customer is required by any foreign governmental authority to deduct or withhold any portion of the amount invoiced for the delivery or use of Support Services under this Agreement, Customer shall increase the sum paid to ClickHouse by an amount necessary for the total payment to ClickHouse equal to the amount originally invoiced.

+ +

4. CONFIDENTIAL INFORMATION

+ +

4.1 Confidential Information. Both parties acknowledge that, in the course of performing this Agreement, they may obtain information relating to products (such as goods, services, and software) of the other party, or relating to the parties themselves, which is of a confidential and proprietary nature ("Confidential Information"). Confidential Information includes materials and all communications concerning ClickHouse's or Customer's business and marketing strategies, including but not limited to employee and customer lists, customer profiles, project plans, design documents, product strategies and pricing data, research, advertising plans, leads and sources of supply, development activities, design and coding, interfaces with the Products, anything provided by either party to the other in connection with the Products and/or Support Services provided under this Agreement, including, without limitation, computer programs, technical drawings, algorithms, know-how, formulas, processes, ideas, inventions (whether patentable or not), schematics and other technical plans and other information of the parties which by its nature can be reasonably expected to be proprietary and confidential, whether it is presented in oral, printed, written, graphic or photographic or other tangible form (including information received, stored or transmitted electronically) even though specific designation as Confidential Information has not been made. Confidential Information also includes any notes, summaries, analyses of the foregoing that are prepared by the receiving party.

+ +

4.2 Non-use and Non-disclosure. The parties shall at all times, both during the Term and thereafter keep in trust and confidence all Confidential Information of the other party using commercially reasonable care (but in no event less than the same degree of care that the receiving party uses to protect its own Confidential Information) and shall not use such Confidential Information other than as necessary to carry out its duties under this Agreement, nor shall either party disclose any such Confidential Information to third parties other than to Affiliates or as necessary to carry out its duties under this Agreement without the other party's prior written consent, provided that each party shall be allowed to disclose Confidential Information of the other party to the extent that such disclosure is approved in writing by such other party, or necessary to enforce its rights under this Agreement.

+ +

4.3 Non-Applicability. The obligations of confidentiality shall not apply to information which (i) has entered the public domain or is otherwise publicly available, except where such entry or availability is the result of a party's breach of this Agreement; (ii) prior to disclosure hereunder was already in the receiving party's possession without restriction as evidenced by appropriate documentation; (iii) subsequent to disclosure hereunder is obtained by the receiving party on a non-confidential basis from a third party who has the right to disclose such information; or (iv) was developed by the receiving party without any use of any of the Confidential Information as evidenced by appropriate documentation.

+ +

4.4 Terms of this Agreement. Except as required by law or governmental regulation, neither party shall disclose, advertise, or publish the terms and conditions of this Agreement without the prior written consent of the other party, except that either party may disclose the terms of this Agreement to potential acquirers, referral partners involved in an applicable transaction, accountants, attorneys and Affiliates pursuant to the terms of a non-disclosure or confidentiality agreement. If Customer is using a third party provider to host a Product, then such provider may also receive, subject to a confidentiality obligation, information related to the terms of this Agreement or Customer’s usage of the applicable Product.

+ +

4.5 Disclosure Required by Law. Notwithstanding anything to the contrary herein, each party may disclose the other party's Confidential Information in order to comply with applicable law and/or an order from a court or other governmental body of competent jurisdiction, and, in connection with compliance with such an order only, if such party: (i) unless prohibited by law, gives the other party prior written notice to such disclosure if the time between that order and such disclosure reasonably permits or, if time does not permit, gives the other party written notice of such disclosure promptly after complying with that order and (ii) fully cooperates with the other party, at the other party's cost and expense, in seeking a protective order, or confidential treatment, or taking other measures to oppose or limit such disclosure. Each party must not release any more of the other party's Confidential Information than is, in the opinion of its counsel, reasonably necessary to comply with an applicable order.

+ +

5. WARRANTIES AND DISCLAIMER OF WARRANTIES

+ +

5.1 Limited Support Services Performance Warranty. ClickHouse warrants that it will perform the Support Services in a professional, workmanlike manner, consistent with generally accepted industry practice, and in accordance with the Support Services Policy. In the event of a breach of the foregoing warranty, ClickHouse’s sole obligation, and Customer’s exclusive remedy, shall be for ClickHouse to re-perform the applicable Support Services.

+ +

5.2 Warranty Disclaimer. EXCEPT AS SET FORTH IN SECTION 5.1 ABOVE, THE SUPPORT SERVICES ARE PROVIDED “AS IS” WITHOUT WARRANTY OF ANY KIND AND CLICKHOUSE MAKES NO ADDITIONAL WARRANTIES, WHETHER EXPRESSED, IMPLIED OR STATUTORY, REGARDING OR RELATING TO THE SUPPORT SERVICES OR ANY MATERIALS FURNISHED OR PROVIDED TO CUSTOMER UNDER THIS AGREEMENT. TO THE MAXIMUM EXTENT PERMITTED UNDER APPLICABLE LAW, CLICKHOUSE SPECIFICALLY DISCLAIMS ALL IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT WITH RESPECT TO THE SUPPORT SERVICES AND ANY MATERIALS FURNISHED OR PROVIDED TO CUSTOMER UNDER THIS AGREEMENT. CUSTOMER UNDERSTANDS AND AGREES THAT THE SUPPORT SERVICES AND ANY MATERIALS FURNISHED OR PROVIDED TO CUSTOMER UNDER THIS AGREEMENT ARE NOT DESIGNED OR INTENDED FOR USE IN THE OPERATION OF NUCLEAR FACILITIES, AIRCRAFT, WEAPONS SYSTEMS, OR LIFE SUPPORT SYSTEMS.

+ +

6. LIMITATION OF LIABILITY

+ +

6.1 Excluded Damages. IN NO EVENT SHALL CUSTOMER OR CLICKHOUSE, OR THEIR RESPECTIVE AFFILIATES, BE LIABLE FOR ANY LOSS OF PROFITS, LOSS OF USE, BUSINESS INTERRUPTION, LOSS OF DATA, COST OF SUBSTITUTE GOODS OR SERVICES, OR FOR ANY PUNITIVE, INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND IN CONNECTION WITH OR ARISING OUT OF THE PERFORMANCE OF OR FAILURE TO PERFORM THIS AGREEMENT, WHETHER ALLEGED AS A BREACH OF CONTRACT OR TORTIOUS CONDUCT, INCLUDING NEGLIGENCE, EVEN IF A PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

+ +

6.2 Damages Cap. EXCEPT WITH RESPECT TO (I) A PARTY’S BREACH OF ITS OBLIGATIONS UNDER SECTION 4, (II) AMOUNTS PAYABLE BY CUSTOMER UNDER SECTION 3 OF THIS AGREEMENT AND EACH ORDER FORM, AND (III) CUSTOMER'S VIOLATIONS OF THE USE RESTRICTIONS SET FORTH IN THIS AGREEMENT, IN NO EVENT SHALL CLICKHOUSE'S OR CUSTOMER’S TOTAL, CUMULATIVE LIABILITY UNDER ANY ORDER FORM EXCEED THE AMOUNT PAID OR PAYABLE BY CUSTOMER TO CLICKHOUSE UNDER THIS AGREEMENT FOR THE AFFECTED SUPPORT SERVICES DELIVERED AND/OR MADE AVAILABLE TO CUSTOMER UNDER SUCH ORDER FORM FOR THE TWELVE (12) MONTH PERIOD IMMEDIATELY PRIOR TO THE FIRST EVENT GIVING RISE TO LIABILITY.

+ +

6.3 Basis of the Bargain. THE ALLOCATIONS OF LIABILITY IN THIS SECTION 6 REPRESENT THE AGREED AND BARGAINED FOR UNDERSTANDING OF THE PARTIES, AND THE COMPENSATION OF CLICKHOUSE FOR THE SUPPORT SERVICES PROVIDED HEREUNDER REFLECTS SUCH ALLOCATIONS. THE FOREGOING LIMITATIONS, EXCLUSIONS AND DISCLAIMERS WILL APPLY TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, EVEN IF ANY REMEDY FAILS IN ITS ESSENTIAL PURPOSE.

+ +

7. TERM AND TERMINATION

+ +

7.1 Subscription Term. The initial Subscription Term for each Subscription will commence and expire in accordance with the start date and end date set forth on the applicable Order Form, unless earlier terminated in in accordance with Section 7.3 below. Thereafter, each Subscription may be renewed for additional one (1) year periods upon the mutual written agreement of the parties. The initial Subscription Term, plus any subsequent renewal Subscription Term shall be the "Subscription Term".

+ +

7.2 Agreement Term. This Agreement will commence on the Effective Date and, unless earlier terminated in accordance with Section 7.3(b) below, continue in force and effect for a period of two (2) years. Thereafter, the term of this Agreement shall automatically renew for additional one (1) year periods unless either party gives written notice to the other of its intention not to renew the Agreement at least thirty (30) days prior to the expiration of the then-current term. The initial term of this Agreement, plus any subsequent renewal term, shall be the "Term" of this Agreement. Notwithstanding any expiration of this Agreement, its terms will continue to apply to any Subscription that has not been terminated or for which the Subscription Term has not expired.

+ +

7.3 Termination.

+
    +
  1. Subscriptions. Each party may terminate a Subscription upon giving notice in writing to the other party if the non-terminating party commits a material breach of this Agreement with respect to such Subscription, and has failed to cure such breach within thirty (30) days following a request in writing from the notifying party to do so. Upon the termination or expiration of a Subscription, the rights and obligations of the parties with respect thereto will, subject to Section 7.4 below, cease, provided that termination of a Subscription under this subsection (a) will not result in termination of any other Subscriptions.
  2. +
  3. Agreement. Either party may terminate this Agreement upon giving notice in writing to the other party if the non-terminating party commits a material breach of this Agreement with respect to any active Subscriptions hereunder, and has failed to cure such breach within thirty (30) days following a request in writing from the notifying party to do so. For the avoidance of doubt, termination of this Agreement under this subsection (b) will result in the termination of all Subscriptions and Order Forms.
  4. +
+ +

7.4 Survival. Upon the expiration or termination of an Order Form or this Agreement, (i) Customer shall have no further rights under any affected Subscription(s); and (ii) any payment obligations accrued under Section 3, as well as the provisions of Sections 1, 4, 5, 6, 7, 7.4 and 9 of this Agreement will survive such expiration or termination.

+ +

8. GENERAL

+ +

8.1 Anti-Corruption. Each party acknowledges that it is aware of, understands and has complied and will comply with, all applicable U.S. and foreign anti-corruption laws, including without limitation, the U.S. Foreign Corrupt Practices Act of 1977 and the U.K. Bribery Act of 2010, and similarly applicable anti-corruption and anti-bribery laws ("Anti-Corruption Laws"). Each party agrees that no one acting on its behalf will give, offer, agree or promise to give, or authorize the giving directly or indirectly, of any money or other thing of value, including travel, entertainment, or gifts, to anyone as an unlawful inducement or reward for favorable action or forbearance from action or the exercise of unlawful influence (a) to any governmental official or employee (including employees of government-owned and government-controlled corporations or agencies or public international organizations), (b) to any political party, official of a political party, or candidate, (c) to an intermediary for payment to any of the foregoing, or (d) to any other person or entity in a corrupt or improper effort to obtain or retain business or any commercial advantage, such as receiving a permit or license, or directing business to any person. Improper payments, provisions, bribes, kickbacks, influence payments, or other unlawful provisions to any person are prohibited under this Agreement.

+ +

8.2 Assignment. Neither party may assign this Agreement, in whole or in part, without the prior written consent of the other party, provided that no such consent will be required to assign this Agreement in its entirety to (i) an Affiliate that is able to satisfy the obligations of the assigning party under this Agreement or (ii) a successor in interest in connection with a merger, acquisition or sale of all or substantially all of the assigning party's assets. Any assignment in violation of this Section shall be void, ab initio, and of no effect. Subject to the foregoing, this Agreement is binding upon, inures to the benefit of and is enforceable by, the parties and their respective permitted successors and assigns.

+ +

8.3 Attorneys' Fees. If any action or proceeding, whether regulatory, administrative, at law or in equity is commenced or instituted to enforce or interpret any of the terms or provisions of this Agreement, the prevailing party in any such action or proceeding shall be entitled to recover its reasonable attorneys' fees, expert witness fees, costs of suit and expenses, in addition to any other relief to which such prevailing party may be entitled. As used herein, "prevailing party" includes without limitation, a party who dismisses an action for recovery hereunder in exchange for payment of the sums allegedly due, performance of covenants allegedly breached, or consideration substantially equal to the relief sought in the action.

+ +

8.4 California Consumer Privacy Act (CCPA). ClickHouse is a “Service Provider” as such term is defined under §1798.140(v) of the CCPA. As such ClickHouse shall not retain, use or disclose any personal information (as defined in the CCPA) received from Customer during the Term of this Agreement for any purpose other than the specific purpose of providing the Support Services specified in this Agreement or for such other business purpose as is specified in this Agreement.

+ +

8.5 Customer Identification. ClickHouse may identify Customer as a user of the Support Services, on its website, through a press release issued by ClickHouse and in other promotional materials.

+ +

8.6 Feedback. Customer, Customer’s Affiliates, and their respective agents, may volunteer feedback to ClickHouse, and/or its Affiliates, about the Support Services (“Feedback”). ClickHouse and its Affiliates shall be irrevocably entitled to use that Feedback, for any purpose and without any duty to account. provided that, in doing so, they may not breach their obligations of confidentiality under Section 4 of this Agreement.

+ +

8.7 Force Majeure. Except with respect to payment obligations, neither party will be liable for, or be considered to be in breach of, or in default under, this Agreement, as a result of any cause or condition beyond such party's reasonable control.

+ +

8.8 Governing Law, Jurisdiction and Venue.

+ +
    +
  1. Customers in California. If Customer is located in California (as determined by the Customer address on the applicable Order Form), this Agreement will be governed by the laws of the State of California, without regard to its conflict of laws principles, and all suits hereunder will be brought solely in Federal Court for the Northern District of California, or if that court lacks subject matter jurisdiction, in any California State Court located in Santa Clara County.
  2. +
  3. Customers Outside of California. If Customer is located anywhere other than California (as determined by the Customer address on the applicable Order Form), this Agreement will be governed by the laws of the State of Delaware, without regard to its conflict of laws principles, and all suits hereunder will be brought solely in Federal Court for the District of Delaware, or if that court lacks subject matter jurisdiction, in any Delaware State Court located in Wilmington, Delaware.
  4. + +
  5. All Customers. This Agreement shall not be governed by the 1980 UN Convention on Contracts for the International Sale of Goods. The parties hereby irrevocably waive any and all claims and defenses either might otherwise have in any action or proceeding in any of the applicable courts set forth in (a) or (b) above, based upon any alleged lack of personal jurisdiction, improper venue, forum non conveniens, or any similar claim or defense.
  6. + +
  7. Equitable Relief. A breach or threatened breach, by either party of Section 4 may cause irreparable harm for which damages at law may not provide adequate relief, and therefore the non-breaching party shall be entitled to seek injunctive relief without being required to post a bond.
  8. + +
+ +

8.9 Non-waiver. Any failure of either party to insist upon or enforce performance by the other party of any of the provisions of this Agreement or to exercise any rights or remedies under this Agreement will not be interpreted or construed as a waiver or relinquishment of such party's right to assert or rely upon such provision, right or remedy in that or any other instance.

+ +

8.10 Notices. Any notice or other communication under this Agreement given by either party to the other will be deemed to be properly given if given in writing and delivered in person or by e-mail, if acknowledged received by return e-mail or followed within one day by a delivered or mailed copy of such notice, or if mailed, properly addressed and stamped with the required postage, to the intended recipient at its address specified on an Order Form. Notices to ClickHouse may also be sent to legal@ClickHouse.com. Either party may from time to time change its address for notices under this Section by giving the other party notice of the change in accordance with this Section.

+ +

8.11 Relationship of the Parties. The relationship of the parties hereunder shall be that of independent contractors, and nothing herein shall be deemed or construed to create any employment, agency or fiduciary relationship between the parties. Each party shall be solely responsible for the supervision, direction, control and payment of its personnel, including, without limitation, for taxes, deductions and withholdings, compensation and benefits, and nothing herein will be deemed to result in either party having an employer-employee relationship with the personnel of the other party.

+ +

8.12 Severability. If any provision of this Agreement is held to be invalid or unenforceable, the remaining portions will remain in full force and effect and such provision will be enforced to the maximum extent possible so as to give effect the intent of the parties and will be reformed to the extent necessary to make such provision valid and enforceable.

+ +

8.13 Entire Agreement; Amendment. This Agreement, together with any Order Forms executed by the parties, and the Support Services Policy, each of which is hereby incorporated herein by this reference, constitutes the entire agreement between the parties concerning the subject matter hereof, and it supersedes, and its terms govern, all prior proposals, agreements, or other communications between the parties, oral or written, regarding such subject matter. This Agreement may be executed in any number of counterparts, each of which when so executed and delivered shall be deemed an original, and all of which together shall constitute one and the same agreement. Execution of a scanned copy will have the same force and effect as execution of an original, and a scanned signature will be deemed an original and valid signature. In the event of any conflict between the terms and conditions of any of the foregoing documents, the conflict shall be resolved based on the following order of precedence: (i) an applicable Order Form (but only for the transaction thereunder), (ii) an applicable Addendum (including any exhibits, attachments and addenda thereto), (iii) this Agreement, and (iv) the Support Services Policy. For the avoidance of doubt, the parties hereby expressly acknowledge and agree that if Customer issues any purchase orders or similar documents in connection with its purchase of a Subscription, it shall do so only for the purpose of Section 2.2(2) or for its own internal, administrative purposes and not with the intent to provide any contractual terms. By entering into this Agreement, whether prior to or following receipt of Customer's purchase order or any similar document, the parties are hereby expressly showing their intention not to be contractually bound by the contents of any such purchase order or similar document, which are hereby deemed rejected and extraneous to this Agreement, and ClickHouse's performance of this Agreement shall not amount to: (i) an acceptance by conduct of any terms set out or referred to in the purchase order or similar document; (ii) an amendment of this Agreement, nor (iii) an agreement to amend this Agreement. This Agreement shall not be modified except by a subsequently dated, written amendment that expressly amends this Agreement and which is signed on behalf of ClickHouse and Customer by their duly authorized representatives. The parties agree that the terms and conditions of this Agreement are a result of mutual negotiations. Therefore, the rule of construction that any ambiguity shall apply against the drafter is not applicable and will not apply to this Agreement. Any ambiguity shall be reasonably construed as to its fair meaning and not strictly for or against one party regardless of who authored the ambiguous language.

+ + +
+
\ No newline at end of file diff --git a/website/templates/support/agreement-hero.html b/website/templates/support/agreement-hero.html new file mode 100644 index 00000000000..ea97fb7729a --- /dev/null +++ b/website/templates/support/agreement-hero.html @@ -0,0 +1,10 @@ +
+
+
+ +

+ {{ _('Clickhouse, Inc.
Subscription Agreement') }} +

+ +
+
\ No newline at end of file