diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml index 57af207127d..db837ac1ec7 100644 --- a/.github/workflows/jepsen.yml +++ b/.github/workflows/jepsen.yml @@ -14,6 +14,7 @@ jobs: with: test_name: Jepsen keeper check runner_type: style-checker + report_required: true run_command: | python3 jepsen_check.py keeper # ServerJepsenRelease: diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index d2865eb737d..6d150f37a27 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -15,6 +15,8 @@ jobs: outputs: data: ${{ steps.runconfig.outputs.CI_DATA }} steps: + - name: DebugInfo + uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a - name: Check out repository code uses: ClickHouse/checkout@v1 with: @@ -33,11 +35,9 @@ jobs: - name: PrepareRunConfig id: runconfig run: | - echo "::group::configure CI run" python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --rebuild-all-binaries --outfile ${{ runner.temp }}/ci_run_data.json - echo "::endgroup::" - echo "::group::CI run configure results" + echo "::group::CI configuration" python3 -m json.tool ${{ runner.temp }}/ci_run_data.json echo "::endgroup::" @@ -255,9 +255,9 @@ jobs: run_command: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head \ - --image-repo clickhouse/clickhouse-server --image-path docker/server + --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse python3 docker_server.py --release-type head \ - --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 52d44b32036..b3ac2135e50 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -22,6 +22,8 @@ jobs: outputs: data: ${{ steps.runconfig.outputs.CI_DATA }} steps: + - name: DebugInfo + uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a - name: Check out repository code uses: ClickHouse/checkout@v1 with: @@ -44,11 +46,9 @@ jobs: - name: PrepareRunConfig id: runconfig run: | - echo "::group::configure CI run" python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --outfile ${{ runner.temp }}/ci_run_data.json - echo "::endgroup::" - echo "::group::CI run configure results" + echo "::group::CI configuration" python3 -m json.tool ${{ runner.temp }}/ci_run_data.json echo "::endgroup::" @@ -67,6 +67,7 @@ jobs: DOCKER_TAG=$(echo '${{ toJson(fromJson(steps.runconfig.outputs.CI_DATA).docker_data.images) }}' | tr -d '\n') export DOCKER_TAG=$DOCKER_TAG python3 ./tests/ci/style_check.py --no-push + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --post --job-name 'Style check' BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() }} @@ -796,7 +797,7 @@ jobs: test_name: Unit tests (asan) runner_type: fuzzer-unit-tester data: ${{ needs.RunConfig.outputs.data }} - UnitTestsReleaseClang: + UnitTestsRelease: needs: [RunConfig, BuilderBinRelease] if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml @@ -923,7 +924,7 @@ jobs: - UnitTestsTsan - UnitTestsMsan - UnitTestsUBsan - - UnitTestsReleaseClang + - UnitTestsRelease - CompatibilityCheckX86 - CompatibilityCheckAarch64 - SQLancerTestRelease diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml index a0dd0a0fa9e..2371579692f 100644 --- a/.github/workflows/reusable_build.yml +++ b/.github/workflows/reusable_build.yml @@ -73,12 +73,15 @@ jobs: - name: Pre run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --pre --job-name '${{inputs.build_name}}' - - name: Build + - name: Run run: | - python3 "$GITHUB_WORKSPACE/tests/ci/build_check.py" "$BUILD_NAME" + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" \ + --infile ${{ toJson(inputs.data) }} \ + --job-name "$BUILD_NAME" \ + --run - name: Post # it still be build report to upload for failed build job - if: always() + if: ${{ !cancelled() }} run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.build_name}}' - name: Mark as done diff --git a/.github/workflows/reusable_simple_job.yml b/.github/workflows/reusable_simple_job.yml index 91022a171ed..7b7084420a4 100644 --- a/.github/workflows/reusable_simple_job.yml +++ b/.github/workflows/reusable_simple_job.yml @@ -34,12 +34,16 @@ name: Simple job working-directory: description: sets custom working directory type: string - default: "" + default: "$GITHUB_WORKSPACE/tests/ci" git_ref: description: commit to use, merge commit for pr or head required: false type: string default: ${{ github.event.after }} # no merge commit + report_required: + description: set to true if job report with the commit status required + type: boolean + default: false secrets: secret_envs: description: if given, it's passed to the environments @@ -81,12 +85,12 @@ jobs: job_type: test - name: Run run: | - if [ -n '${{ inputs.working-directory }}' ]; then - cd "${{ inputs.working-directory }}" - else - cd "$GITHUB_WORKSPACE/tests/ci" - fi + cd "${{ inputs.working-directory }}" ${{ inputs.run_command }} + - name: Post + if: ${{ inputs.report_required }} + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --post --job-name '${{inputs.test_name}}' - name: Clean if: always() uses: ./.github/actions/clean diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml index 09177ad887a..749f64d434e 100644 --- a/.github/workflows/reusable_test.yml +++ b/.github/workflows/reusable_test.yml @@ -38,7 +38,7 @@ name: Testing workflow working-directory: description: sets custom working directory type: string - default: "" + default: "$GITHUB_WORKSPACE/tests/ci" secrets: secret_envs: description: if given, it's passed to the environments @@ -96,19 +96,14 @@ jobs: python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --pre --job-name '${{inputs.test_name}}' - name: Run run: | - if [ -n "${{ inputs.working-directory }}" ]; then - cd "${{ inputs.working-directory }}" - else - cd "$GITHUB_WORKSPACE/tests/ci" - fi - if [ -n "$(echo '${{ inputs.run_command }}' | tr -d '\n')" ]; then - echo "Running command from workflow input" - ${{ inputs.run_command }} - else - echo "Running command from job config" - python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --run --job-name '${{inputs.test_name}}' - fi + cd "${{ inputs.working-directory }}" + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" \ + --infile ${{ toJson(inputs.data) }} \ + --job-name '${{inputs.test_name}}' \ + --run \ + --run-command '''${{inputs.run_command}}''' - name: Post run + if: ${{ !cancelled() }} run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.test_name}}' - name: Mark as done diff --git a/.gitmessage b/.gitmessage index f4a25a837bc..098b66aab1c 100644 --- a/.gitmessage +++ b/.gitmessage @@ -1,9 +1,18 @@ -## To avoid merge commit in CI run (add a leading space to apply): -#no-merge-commit +### CI modificators (add a leading space to apply): -## Running specified job (add a leading space to apply): +## To avoid a merge commit in CI: +#no_merge_commit + +## To discard CI cache: +#no_ci_cache + +## To run specified set of tests in CI: +#ci_set_ +#ci_set_reduced + +## To run specified job in CI: #job_ #job_stateless_tests_release #job_package_debug diff --git a/base/poco/Foundation/include/Poco/BufferedStreamBuf.h b/base/poco/Foundation/include/Poco/BufferedStreamBuf.h index 9f4cbd4e4d8..d97e37eedf3 100644 --- a/base/poco/Foundation/include/Poco/BufferedStreamBuf.h +++ b/base/poco/Foundation/include/Poco/BufferedStreamBuf.h @@ -26,6 +26,11 @@ #include "Poco/StreamUtil.h" +namespace DB +{ +class ReadBufferFromIStream; +} + namespace Poco { @@ -120,6 +125,8 @@ protected: openmode getMode() const { return _mode; } private: + friend class DB::ReadBufferFromIStream; + virtual int readFromDevice(char_type * /*buffer*/, std::streamsize /*length*/) { return 0; } virtual int writeToDevice(const char_type * /*buffer*/, std::streamsize /*length*/) { return 0; } diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index b9c7ea34a36..78f18f376f4 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -49,17 +49,10 @@ CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}" CLICKHOUSE_DB="${CLICKHOUSE_DB:-}" CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}" -for dir in "$DATA_DIR" \ - "$ERROR_LOG_DIR" \ - "$LOG_DIR" \ - "$TMP_DIR" \ - "$USER_PATH" \ - "$FORMAT_SCHEMA_PATH" \ - "${DISKS_PATHS[@]}" \ - "${DISKS_METADATA_PATHS[@]}" -do +function create_directory_and_do_chown() { + local dir=$1 # check if variable not empty - [ -z "$dir" ] && continue + [ -z "$dir" ] && return # ensure directories exist if [ "$DO_CHOWN" = "1" ]; then mkdir="mkdir" @@ -81,6 +74,23 @@ do chown -R "$USER:$GROUP" "$dir" fi fi +} + +create_directory_and_do_chown "$DATA_DIR" + +# Change working directory to $DATA_DIR in case there're paths relative to $DATA_DIR, also avoids running +# clickhouse-server at root directory. +cd "$DATA_DIR" + +for dir in "$ERROR_LOG_DIR" \ + "$LOG_DIR" \ + "$TMP_DIR" \ + "$USER_PATH" \ + "$FORMAT_SCHEMA_PATH" \ + "${DISKS_PATHS[@]}" \ + "${DISKS_METADATA_PATHS[@]}" +do + create_directory_and_do_chown "$dir" done # if clickhouse user is defined - create it (user "default" already exists out of box) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index bd13791d06a..08ee52e4f1b 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -99,6 +99,16 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] > /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp mv /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server2/config.d/filesystem_caches_path.xml + sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \ + | sed "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_1/|" \ + > /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp + mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml + + sudo cat /etc/clickhouse-server2/config.d/filesystem_caches_path.xml \ + | sed "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_2/|" \ + > /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp + mv /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server2/config.d/filesystem_caches_path.xml + mkdir -p /var/run/clickhouse-server1 sudo chown clickhouse:clickhouse /var/run/clickhouse-server1 sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \ diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 861fd51a3fa..9c008209316 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -56,6 +56,9 @@ echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/sys # Install previous release packages install_packages previous_release_package_folder +# Save old settings from system table for settings changes check +clickhouse-local -q "select * from system.settings format Native" > old_settings.native + # Initial run without S3 to create system.*_log on local file system to make it # available for dump via clickhouse-local configure @@ -152,6 +155,63 @@ install_packages package_folder export ZOOKEEPER_FAULT_INJECTION=1 configure +# Check that all new/changed setting were added in settings changes history. +# Some settings can be different for builds with sanitizers, so we check +# settings changes only for non-sanitizer builds. +IS_SANITIZED=$(clickhouse-local --query "SELECT value LIKE '%-fsanitize=%' FROM system.build_options WHERE name = 'CXX_FLAGS'") +if [ "${IS_SANITIZED}" -eq "0" ] +then + clickhouse-local -q "select * from system.settings format Native" > new_settings.native + clickhouse-local -nmq " + CREATE TABLE old_settings AS file('old_settings.native'); + CREATE TABLE new_settings AS file('new_settings.native'); + + SELECT + name, + new_settings.value AS new_value, + old_settings.value AS old_value + FROM new_settings + LEFT JOIN old_settings ON new_settings.name = old_settings.name + WHERE (new_settings.value != old_settings.value) AND (name NOT IN ( + SELECT arrayJoin(tupleElement(changes, 'name')) + FROM system.settings_changes + WHERE version = extract(version(), '^(?:\\d+\\.\\d+)') + )) + SETTINGS join_use_nulls = 1 + INTO OUTFILE 'changed_settings.txt' + FORMAT PrettyCompactNoEscapes; + + SELECT name + FROM new_settings + WHERE (name NOT IN ( + SELECT name + FROM old_settings + )) AND (name NOT IN ( + SELECT arrayJoin(tupleElement(changes, 'name')) + FROM system.settings_changes + WHERE version = extract(version(), '^(?:\\d+\\.\\d+)') + )) + INTO OUTFILE 'new_settings.txt' + FORMAT PrettyCompactNoEscapes; + " + + if [ -s changed_settings.txt ] + then + mv changed_settings.txt /test_output/ + echo -e "Changed settings are not reflected in settings changes history (see changed_settings.txt)$FAIL$(head_escaped /test_output/changed_settings.txt)" >> /test_output/test_results.tsv + else + echo -e "There are no changed settings or they are reflected in settings changes history$OK" >> /test_output/test_results.tsv + fi + + if [ -s new_settings.txt ] + then + mv new_settings.txt /test_output/ + echo -e "New settings are not reflected in settings changes history (see new_settings.txt)$FAIL$(head_escaped /test_output/new_settings.txt)" >> /test_output/test_results.tsv + else + echo -e "There are no new settings or they are reflected in settings changes history$OK" >> /test_output/test_results.tsv + fi +fi + # Just in case previous version left some garbage in zk sudo cat /etc/clickhouse-server/config.d/lost_forever_check.xml \ | sed "s|>1<|>0<|g" \ @@ -257,6 +317,8 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d (test like '%Fatal message%') DESC, (test like '%Error message%') DESC, (test like '%previous release%') DESC, +(test like '%Changed settings%') DESC, +(test like '%New settings%') DESC, rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv diff --git a/docs/en/engines/table-engines/integrations/mysql.md b/docs/en/engines/table-engines/integrations/mysql.md index e50ed8caedd..87d59a9394b 100644 --- a/docs/en/engines/table-engines/integrations/mysql.md +++ b/docs/en/engines/table-engines/integrations/mysql.md @@ -16,7 +16,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], ... -) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']) +) ENGINE = MySQL({host:port, database, table, user, password[, replace_query, on_duplicate_clause] | named_collection[, option=value [,..]]}) SETTINGS [ connection_pool_size=16, ] [ connection_max_tries=3, ] @@ -42,23 +42,17 @@ The MySQL Table Engine is currently not available on the ClickHouse builds for M **Engine Parameters** - `host:port` — MySQL server address. - - `database` — Remote database name. - - `table` — Remote table name. - - `user` — MySQL user. - - `password` — User password. - - `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. If `replace_query=1`, the query is substituted. - - `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. - Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, where `on_duplicate_clause` is `UPDATE c2 = c2 + 1`. See the [MySQL documentation](https://dev.mysql.com/doc/refman/8.0/en/insert-on-duplicate.html) to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause. - To specify `on_duplicate_clause` you need to pass `0` to the `replace_query` parameter. If you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception. +Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md). In this case `host` and `port` should be specified separately. This approach is recommended for production environment. + Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are executed on the MySQL server. The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes. @@ -71,7 +65,7 @@ CREATE TABLE test_replicas (id UInt32, name String, age UInt32, money UInt32) EN ## Usage Example {#usage-example} -Table in MySQL: +Create table in MySQL: ``` text mysql> CREATE TABLE `test`.`test` ( @@ -94,7 +88,7 @@ mysql> select * from test; 1 row in set (0,00 sec) ``` -Table in ClickHouse, retrieving data from the MySQL table created above: +Create table in ClickHouse using plain arguments: ``` sql CREATE TABLE mysql_table @@ -105,6 +99,25 @@ CREATE TABLE mysql_table ENGINE = MySQL('localhost:3306', 'test', 'test', 'bayonet', '123') ``` +Or using [named collections](/docs/en/operations/named-collections.md): + +```sql +CREATE NAMED COLLECTION creds AS + host = 'localhost', + port = 3306, + database = 'test', + user = 'bayonet', + password = '123'; +CREATE TABLE mysql_table +( + `float_nullable` Nullable(Float32), + `int_id` Int32 +) +ENGINE = MySQL(creds, table='test') +``` + +Retrieving data from MySQL table: + ``` sql SELECT * FROM mysql_table ``` diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index aa3dc855537..131df1a435b 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -16,7 +16,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name1 type1 [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], name2 type2 [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], ... -) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]); +) ENGINE = PostgreSQL({host:port, database, table, user, password[, schema, [, on_conflict]] | named_collection[, option=value [,..]]}) ``` See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query. @@ -35,31 +35,25 @@ The table structure can differ from the original PostgreSQL table structure: - `user` — PostgreSQL user. - `password` — User password. - `schema` — Non-default table schema. Optional. -- `on conflict ...` — example: `ON CONFLICT DO NOTHING`. Optional. Note: adding this option will make insertion less efficient. +- `on_conflict` — Conflict resolution strategy. Example: `ON CONFLICT DO NOTHING`. Optional. Note: adding this option will make insertion less efficient. -or via config (since version 21.11): +[Named collections](/docs/en/operations/named-collections.md) (available since version 21.11) are recommended for production environment. Here is an example: ``` - - - - - -
-
- - - - - - + + localhost + 5432 + postgres + **** + schema1 +
``` Some parameters can be overridden by key value arguments: ``` sql -SELECT * FROM postgresql(postgres1, schema='schema1', table='table1'); +SELECT * FROM postgresql(postgres_creds, table='table1'); ``` ## Implementation Details {#implementation-details} diff --git a/docs/en/engines/table-engines/integrations/redis.md b/docs/en/engines/table-engines/integrations/redis.md index 8086a6503b8..3a07d150835 100644 --- a/docs/en/engines/table-engines/integrations/redis.md +++ b/docs/en/engines/table-engines/integrations/redis.md @@ -16,30 +16,32 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name name1 [type1], name2 [type2], ... -) ENGINE = Redis(host:port[, db_index[, password[, pool_size]]]) PRIMARY KEY(primary_key_name); +) ENGINE = Redis({host:port[, db_index[, password[, pool_size]]] | named_collection[, option=value [,..]] }) +PRIMARY KEY(primary_key_name); ``` **Engine Parameters** - `host:port` — Redis server address, you can ignore port and default Redis port 6379 will be used. - - `db_index` — Redis db index range from 0 to 15, default is 0. - - `password` — User password, default is blank string. - - `pool_size` — Redis max connection pool size, default is 16. - - `primary_key_name` - any column name in the column list. -- `primary` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a Redis key. +:::note Serialization +`PRIMARY KEY` supports only one column. The primary key will be serialized in binary as a Redis key. +Columns other than the primary key will be serialized in binary as Redis value in corresponding order. +::: -- columns other than the primary key will be serialized in binary as Redis value in corresponding order. +Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md). In this case `host` and `port` should be specified separately. This approach is recommended for production environment. At this moment, all parameters passed using named collections to redis are required. -- queries with key equals or in filtering will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation. +:::note Filtering +Queries with `key equals` or `in filtering` will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation. +::: ## Usage Example {#usage-example} -Create a table in ClickHouse which allows to read data from Redis: +Create a table in ClickHouse using `Redis` engine with plain arguments: ``` sql CREATE TABLE redis_table @@ -52,6 +54,31 @@ CREATE TABLE redis_table ENGINE = Redis('redis1:6379') PRIMARY KEY(key); ``` +Or using [named collections](/docs/en/operations/named-collections.md): + +``` + + + localhost + 6379 + **** + 16 + s0 + + +``` + +```sql +CREATE TABLE redis_table +( + `key` String, + `v1` UInt32, + `v2` String, + `v3` Float32 +) +ENGINE = Redis(redis_creds) PRIMARY KEY(key); +``` + Insert: ```sql diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index 06c05929ffa..c9d94dd95ee 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -5,9 +5,9 @@ sidebar_label: "Named collections" title: "Named collections" --- -Named collections provide a way to store collections of key-value pairs to be +Named collections provide a way to store collections of key-value pairs to be used to configure integrations with external sources. You can use named collections with -dictionaries, tables, table functions, and object storage. +dictionaries, tables, table functions, and object storage. Named collections can be configured with DDL or in configuration files and are applied when ClickHouse starts. They simplify the creation of objects and the hiding of credentials @@ -64,7 +64,7 @@ To manage named collections with DDL a user must have the `named_control_collect ``` :::tip -In the above example the `password_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user. +In the above example the `password_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user. ::: ## Storing named collections in configuration files @@ -296,7 +296,6 @@ host = '127.0.0.1', port = 5432, database = 'test', schema = 'test_schema', -connection_pool_size = 8 ``` Example of configuration: @@ -310,7 +309,6 @@ Example of configuration: 5432 test test_schema - 8 @@ -445,4 +443,3 @@ SELECT dictGet('dict', 'b', 1); │ a │ └─────────────────────────┘ ``` - diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 1cb7ec9dced..9a80f977ed1 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -172,7 +172,7 @@ If you set `timeout_before_checking_execution_speed `to 0, ClickHouse will use c ## timeout_overflow_mode {#timeout-overflow-mode} -What to do if the query is run longer than `max_execution_time`: `throw` or `break`. By default, `throw`. +What to do if the query is run longer than `max_execution_time` or the estimated running time is longer than `max_estimated_execution_time`: `throw` or `break`. By default, `throw`. # max_execution_time_leaf @@ -214,6 +214,10 @@ A maximum number of execution bytes per second. Checked on every data block when Checks that execution speed is not too slow (no less than ‘min_execution_speed’), after the specified time in seconds has expired. +## max_estimated_execution_time {#max_estimated_execution_time} + +Maximum query estimate execution time in seconds. Checked on every data block when ‘timeout_before_checking_execution_speed’ expires. + ## max_columns_to_read {#max-columns-to-read} A maximum number of columns that can be read from a table in a single query. If a query requires reading a greater number of columns, it throws an exception. diff --git a/docs/en/operations/system-tables/replication_queue.md b/docs/en/operations/system-tables/replication_queue.md index dd8f6328688..d63517291a4 100644 --- a/docs/en/operations/system-tables/replication_queue.md +++ b/docs/en/operations/system-tables/replication_queue.md @@ -49,7 +49,7 @@ Columns: - `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last attempted. -- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of postponed tasks. +- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of times the action was postponed. - `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — The reason why the task was postponed. diff --git a/docs/en/sql-reference/table-functions/fuzzJSON.md b/docs/en/sql-reference/table-functions/fuzzJSON.md index a64f35691f6..ab7bd7f9f1b 100644 --- a/docs/en/sql-reference/table-functions/fuzzJSON.md +++ b/docs/en/sql-reference/table-functions/fuzzJSON.md @@ -9,7 +9,7 @@ sidebar_label: fuzzJSON Perturbs a JSON string with random variations. ``` sql -fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] }) +fuzzJSON({ named_collection [, option=value [,..]] | json_str[, random_seed] }) ``` **Arguments** diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md index 5ffc20189da..80077ecdb33 100644 --- a/docs/en/sql-reference/table-functions/gcs.md +++ b/docs/en/sql-reference/table-functions/gcs.md @@ -16,7 +16,8 @@ If you have multiple replicas in your cluster, you can use the [s3Cluster functi **Syntax** ``` sql -gcs(path [,hmac_key, hmac_secret] [,format] [,structure] [,compression]) +gcs(url [, NOSIGN | hmac_key, hmac_secret] [,format] [,structure] [,compression_method]) +gcs(named_collection[, option=value [,..]]) ``` :::tip GCS @@ -24,10 +25,9 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML ::: -**Arguments** - -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +**Parameters** +- `url` — Bucket path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. :::note GCS The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: ``` @@ -35,10 +35,21 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML ``` and not ~~https://storage.cloud.google.com~~. ::: +- `NOSIGN` — If this keyword is provided in place of credentials, all the requests will not be signed. +- `hmac_key` and `hmac_secret` — Keys that specify credentials to use with given endpoint. Optional. +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression_method` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression method by file extension. + +Arguments can also be passed using [named collections](/docs/en/operations/named-collections.md). In this case `url`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported: + + - `access_key_id` — `hmac_key`, optional. + - `secret_access_key` — `hmac_secret`, optional. + - `filename` — appended to the url if specified. + - `use_environment_credentials` — enabled by default, allows passing extra parameters using environment variables `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI`, `AWS_CONTAINER_CREDENTIALS_FULL_URI`, `AWS_CONTAINER_AUTHORIZATION_TOKEN`, `AWS_EC2_METADATA_DISABLED`. + - `no_sign_request` — disabled by default. + - `expiration_window_seconds` — default value is 120. -- `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. **Returned value** @@ -61,7 +72,7 @@ LIMIT 2; └─────────┴─────────┴─────────┘ ``` -The similar but from file with `gzip` compression: +The similar but from file with `gzip` compression method: ``` sql SELECT * @@ -158,6 +169,16 @@ The below get data from all `test-data.csv.gz` files from any folder inside `my- SELECT * FROM gcs('https://storage.googleapis.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); ``` +For production use cases it is recommended to use [named collections](/docs/en/operations/named-collections.md). Here is the example: +``` sql + +CREATE NAMED COLLECTION creds AS + access_key_id = '***', + secret_access_key = '***'; +SELECT count(*) +FROM gcs(creds, url='https://s3-object-url.csv') +``` + ## Partitioned Write If you specify `PARTITION BY` expression when inserting data into `GCS` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md index 0e5b0f54d1c..5fd9708317c 100644 --- a/docs/en/sql-reference/table-functions/mysql.md +++ b/docs/en/sql-reference/table-functions/mysql.md @@ -11,31 +11,25 @@ Allows `SELECT` and `INSERT` queries to be performed on data that is stored on a **Syntax** ``` sql -mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']) +mysql({host:port, database, table, user, password[, replace_query, on_duplicate_clause] | named_collection[, option=value [,..]]}) ``` -**Arguments** +**Parameters** - `host:port` — MySQL server address. - - `database` — Remote database name. - - `table` — Remote table name. - - `user` — MySQL user. - - `password` — User password. - - `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. Possible values: - `0` - The query is executed as `INSERT INTO`. - `1` - The query is executed as `REPLACE INTO`. - - `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. Can be specified only with `replace_query = 0` (if you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception). - Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1;` - `on_duplicate_clause` here is `UPDATE c2 = c2 + 1`. See the MySQL documentation to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause. +Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md). In this case `host` and `port` should be specified separately. This approach is recommended for production environment. + Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are currently executed on the MySQL server. The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes. @@ -86,6 +80,18 @@ Selecting data from ClickHouse: SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123'); ``` +Or using [named collections](/docs/en/operations/named-collections.md): + +```sql +CREATE NAMED COLLECTION creds AS + host = 'localhost', + port = 3306, + database = 'test', + user = 'bayonet', + password = '123'; +SELECT * FROM mysql(creds, table='test'); +``` + ``` text ┌─int_id─┬─float─┐ │ 1 │ 2 │ diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md index b9211d70cdb..3fd0e5805e7 100644 --- a/docs/en/sql-reference/table-functions/postgresql.md +++ b/docs/en/sql-reference/table-functions/postgresql.md @@ -11,10 +11,10 @@ Allows `SELECT` and `INSERT` queries to be performed on data that is stored on a **Syntax** ``` sql -postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`]) +postgresql({host:port, database, table, user, password[, schema, [, on_conflict]] | named_collection[, option=value [,..]]}) ``` -**Arguments** +**Parameters** - `host:port` — PostgreSQL server address. - `database` — Remote database name. @@ -22,6 +22,9 @@ postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`]) - `user` — PostgreSQL user. - `password` — User password. - `schema` — Non-default table schema. Optional. +- `on_conflict` — Conflict resolution strategy. Example: `ON CONFLICT DO NOTHING`. Optional. + +Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md). In this case `host` and `port` should be specified separately. This approach is recommended for production environment. **Returned Value** @@ -86,12 +89,24 @@ postgresql> SELECT * FROM test; (1 row) ``` -Selecting data from ClickHouse: +Selecting data from ClickHouse using plain arguments: ```sql SELECT * FROM postgresql('localhost:5432', 'test', 'test', 'postgresql_user', 'password') WHERE str IN ('test'); ``` +Or using [named collections](/docs/en/operations/named-collections.md): + +```sql +CREATE NAMED COLLECTION mypg AS + host = 'localhost', + port = 5432, + database = 'test', + user = 'postgresql_user', + password = 'password'; +SELECT * FROM postgresql(mypg, table='test') WHERE str IN ('test'); +``` + ``` text ┌─int_id─┬─int_nullable─┬─float─┬─str──┬─float_nullable─┐ │ 1 │ ᴺᵁᴸᴸ │ 2 │ test │ ᴺᵁᴸᴸ │ diff --git a/docs/en/sql-reference/table-functions/redis.md b/docs/en/sql-reference/table-functions/redis.md index 98d9a647cee..09841642210 100644 --- a/docs/en/sql-reference/table-functions/redis.md +++ b/docs/en/sql-reference/table-functions/redis.md @@ -34,6 +34,7 @@ redis(host:port, key, structure[, db_index[, password[, pool_size]]]) - queries with key equals or in filtering will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation. +[Named collections](/docs/en/operations/named-collections.md) are not supported for `redis` table function at the moment. **Returned Value** @@ -41,17 +42,7 @@ A table object with key as Redis key, other columns packaged together as Redis v ## Usage Example {#usage-example} -Create a table in ClickHouse which allows to read data from Redis: - -``` sql -CREATE TABLE redis_table -( - `k` String, - `m` String, - `n` UInt32 -) -ENGINE = Redis('redis1:6379') PRIMARY KEY(k); -``` +Read from Redis: ```sql SELECT * FROM redis( @@ -61,6 +52,15 @@ SELECT * FROM redis( ) ``` +Insert into Redis: + +```sql +INSERT INTO TABLE FUNCTION redis( + 'redis1:6379', + 'key', + 'key String, v1 String, v2 UInt32') values ('1', '1', 1); +``` + **See Also** - [The `Redis` table engine](/docs/en/engines/table-engines/integrations/redis.md) diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index 228f4a4c7e1..f6e49099d99 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -13,10 +13,12 @@ Both functions can be used in `SELECT` and `INSERT` queries. ## Syntax ``` sql -remote('addresses_expr', [db, table, 'user'[, 'password'], sharding_key]) -remote('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) -remoteSecure('addresses_expr', [db, table, 'user'[, 'password'], sharding_key]) -remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) +remote(addresses_expr, [db, table, user [, password], sharding_key]) +remote(addresses_expr, [db.table, user [, password], sharding_key]) +remote(named_collection[, option=value [,..]]) +remoteSecure(addresses_expr, [db, table, user [, password], sharding_key]) +remoteSecure(addresses_expr, [db.table, user [, password], sharding_key]) +remoteSecure(named_collection[, option=value [,..]]) ``` ## Parameters @@ -39,6 +41,8 @@ remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) - `password` — User password. If not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md). - `sharding_key` — Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md). + ## Returned value A table located on a remote server. @@ -82,7 +86,16 @@ example01-01-1,example01-02-1 SELECT * FROM remote('127.0.0.1', db.remote_engine_table) LIMIT 3; ``` -### Inserting data from a remote server into a table: +Or using [named collections](/docs/en/operations/named-collections.md): + +```sql +CREATE NAMED COLLECTION creds AS + host = '127.0.0.1', + database = 'db'; +SELECT * FROM remote(creds, table='remote_engine_table') LIMIT 3; +``` + +### Inserting data into a table on a remote server: ``` sql CREATE TABLE remote_table (name String, value UInt32) ENGINE=Memory; diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 8065f066666..970b3e52882 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -16,33 +16,41 @@ When using the `s3 table function` with [`INSERT INTO...SELECT`](../../sql-refer **Syntax** ``` sql -s3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key [,session_token]] [,format] [,structure] [,compression]) +s3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method]) +s3(named_collection[, option=value [,..]]) ``` :::tip GCS The S3 Table Function integrates with Google Cloud Storage by using the GCS XML API and HMAC keys. See the [Google interoperability docs]( https://cloud.google.com/storage/docs/interoperability) for more details about the endpoint and HMAC. -For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_id` and `aws_secret_access_key`. +For GCS, substitute your HMAC key and HMAC secret where you see `access_key_id` and `secret_access_key`. ::: -**Arguments** +**Parameters** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +`s3` table function supports the following plain parameters: +- `url` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). :::note GCS - The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: + The GCS url is in this format as the endpoint for the Google XML API is different than the JSON API: ``` https://storage.googleapis.com/// ``` and not ~~https://storage.cloud.google.com~~. ::: - -- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. -- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `NOSIGN` — If this keyword is provided in place of credentials, all the requests will not be signed. +- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. - `session_token` - Session token to use with the given keys. Optional when passing keys. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. +- `compression_method` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression method by file extension. + +Arguments can also be passed using [named collections](/docs/en/operations/named-collections.md). In this case `url`, `access_key_id`, `secret_access_key`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported: + + - `filename` — appended to the url if specified. + - `use_environment_credentials` — enabled by default, allows passing extra parameters using environment variables `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI`, `AWS_CONTAINER_CREDENTIALS_FULL_URI`, `AWS_CONTAINER_AUTHORIZATION_TOKEN`, `AWS_EC2_METADATA_DISABLED`. + - `no_sign_request` — disabled by default. + - `expiration_window_seconds` — default value is 120. **Returned value** @@ -82,7 +90,7 @@ FROM s3( LIMIT 5; ``` -ClickHouse also can determine the compression of the file. For example, if the file was zipped up with a `.csv.gz` extension, ClickHouse would decompress the file automatically. +ClickHouse also can determine the compression method of the file. For example, if the file was zipped up with a `.csv.gz` extension, ClickHouse would decompress the file automatically. ::: @@ -168,7 +176,7 @@ The below get data from all `test-data.csv.gz` files from any folder inside `my- SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); ``` -Note. It is possible to specify custom URL mappers in the server configuration file. Example: +Note. It is possible to specify custom URL mappers in the server configuration file. Example: ``` sql SELECT * FROM s3('s3://clickhouse-public-datasets/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); ``` @@ -190,6 +198,16 @@ Custom mapper can be added into `config.xml`: ``` +For production use cases it is recommended to use [named collections](/docs/en/operations/named-collections.md). Here is the example: +``` sql + +CREATE NAMED COLLECTION creds AS + access_key_id = '***', + secret_access_key = '***'; +SELECT count(*) +FROM s3(creds, url='https://s3-object-url.csv') +``` + ## Partitioned Write If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index 080c9860519..92d9527df82 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -4,23 +4,34 @@ sidebar_position: 181 sidebar_label: s3Cluster title: "s3Cluster Table Function" --- +This is an extension to the [s3](/docs/en/sql-reference/table-functions/s3.md) table function. Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) and Google Cloud Storage [Google Cloud Storage](https://cloud.google.com/storage/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. **Syntax** ``` sql -s3Cluster(cluster_name, source, [,access_key_id, secret_access_key, [session_token]] [,format] [,structure]) +s3Cluster(cluster_name, url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method]) +s3Cluster(cluster_name, named_collection[, option=value [,..]]) ``` **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). -- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `url` — path to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `NOSIGN` — If this keyword is provided in place of credentials, all the requests will not be signed. +- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. - `session_token` - Session token to use with the given keys. Optional when passing keys. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression_method` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression method by file extension. + +Arguments can also be passed using [named collections](/docs/en/operations/named-collections.md). In this case `url`, `access_key_id`, `secret_access_key`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported: + + - `filename` — appended to the url if specified. + - `use_environment_credentials` — enabled by default, allows passing extra parameters using environment variables `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI`, `AWS_CONTAINER_CREDENTIALS_FULL_URI`, `AWS_CONTAINER_AUTHORIZATION_TOKEN`, `AWS_EC2_METADATA_DISABLED`. + - `no_sign_request` — disabled by default. + - `expiration_window_seconds` — default value is 120. **Returned value** @@ -47,6 +58,18 @@ Count the total amount of rows in all files in the cluster `cluster_simple`: If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: +For production use cases it is recommended to use [named collections](/docs/en/operations/named-collections.md). Here is the example: +``` sql + +CREATE NAMED COLLECTION creds AS + access_key_id = 'minio' + secret_access_key = 'minio123'; +SELECT count(*) FROM s3Cluster( + 'cluster_simple', creds, url='https://s3-object-url.csv', + format='CSV', structure='name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))' +) +``` + **See Also** - [S3 engine](../../engines/table-engines/integrations/s3.md) diff --git a/docs/ru/getting-started/tutorial.md b/docs/ru/getting-started/tutorial.md index a2ddb103bc3..34064b6cf2f 100644 --- a/docs/ru/getting-started/tutorial.md +++ b/docs/ru/getting-started/tutorial.md @@ -670,4 +670,4 @@ ENGINE = ReplicatedMergeTree( INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local; ``` -Репликация работает в режиме мультимастера. Это означает, что данные могут быть загружены на любую из реплик и система автоматически синхронизирует данные между остальными репликами. Репликация асинхронна, то есть в конкретный момент времнени не все реплики могут содержать недавно добавленные данные. Как минимум одна реплика должна быть в строю для приёма данных. Прочие реплики синхронизируются и восстановят согласованное состояния как только снова станут активными. Заметим, что при таком подходе есть вероятность утраты недавно добавленных данных. +Репликация работает в режиме мультимастера. Это означает, что данные могут быть загружены на любую из реплик и система автоматически синхронизирует данные между остальными репликами. Репликация асинхронна, то есть в конкретный момент времени не все реплики могут содержать недавно добавленные данные. Как минимум одна реплика должна быть в строю для приёма данных. Прочие реплики синхронизируются и восстановят согласованное состояния как только снова станут активными. Заметим, что при таком подходе есть вероятность утраты недавно добавленных данных. diff --git a/docs/ru/operations/system-tables/replication_queue.md b/docs/ru/operations/system-tables/replication_queue.md index 60d42133153..31bd0bf50fd 100644 --- a/docs/ru/operations/system-tables/replication_queue.md +++ b/docs/ru/operations/system-tables/replication_queue.md @@ -49,7 +49,7 @@ slug: /ru/operations/system-tables/replication_queue - `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время последней попытки выполнить задачу. -- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество отложенных задач. +- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество откладываний запуска задачи. - `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — причина, по которой была отложена задача. diff --git a/docs/zh/faq/general/ne-tormozit.md b/docs/zh/faq/general/ne-tormozit.md index c4149655108..f397f6bb1d6 100644 --- a/docs/zh/faq/general/ne-tormozit.md +++ b/docs/zh/faq/general/ne-tormozit.md @@ -1,27 +1,27 @@ --- slug: /zh/faq/general/ne-tormozit -title: "What does \u201C\u043D\u0435 \u0442\u043E\u0440\u043C\u043E\u0437\u0438\u0442\ - \u201D mean?" +title: "\u201C\u043D\u0435 \u0442\u043E\u0440\u043C\u043E\u0437\u0438\u0442\ + \u201D 是什么意思?" toc_hidden: true sidebar_position: 11 --- -# What Does “Не тормозит” Mean? {#what-does-ne-tormozit-mean} +# “Не тормозит” 是什么意思? {#what-does-ne-tormozit-mean} -This question usually arises when people see official ClickHouse t-shirts. They have large words **“ClickHouse не тормозит”** on the front. +这个问题通常出现在人们看到官方 ClickHouse T恤时。它们的正面印有大字**“ClickHouse не тормозит”**。 -Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, [Yandex](https://yandex.com/company/). That’s why it initially got its slogan in Russian, which is “не тормозит” (pronounced as “ne tormozit”). After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is. +在 ClickHouse 开源之前,它作为俄罗斯最大的 IT 公司 [Yandex](https://yandex.com/company/) 的内部存储系统而开发。这就是为什么它最初获得了俄文口号“не тормозит”(发音为“ne tormozit”)。在开源发布后,我们首先为俄罗斯的活动制作了一些这样的T恤,使用原汁原味的口号是理所当然的。 -One of the following batches of those t-shirts was supposed to be given away on events outside of Russia and we tried to make the English version of the slogan. Unfortunately, the Russian language is kind of elegant in terms of expressing stuff and there was a restriction of limited space on a t-shirt, so we failed to come up with good enough translation (most options appeared to be either long or inaccurate) and decided to keep the slogan in Russian even on t-shirts produced for international events. It appeared to be a great decision because people all over the world get positively surprised and curious when they see it. +其中一批这样的T恤原本打算在俄罗斯之外的活动中赠送,我们尝试制作口号的英文版本。不幸的是,俄语在表达方面有些优雅,而且T恤上的空间有限,所以我们未能提出足够好的翻译(大多数选项要么太长,要么不够准确),并决定即使在为国际活动制作的T恤上也保留俄文口号。这被证明是一个绝妙的决定,因为全世界的人们看到它时都会感到惊喜和好奇。 -So, what does it mean? Here are some ways to translate *“не тормозит”*: +那么,它是什么意思呢?以下是翻译“не тормозит”的一些方式: -- If you translate it literally, it’d be something like *“ClickHouse does not press the brake pedal”*. -- If you’d want to express it as close to how it sounds to a Russian person with IT background, it’d be something like *“If your larger system lags, it’s not because it uses ClickHouse”*. -- Shorter, but not so precise versions could be *“ClickHouse is not slow”*, *“ClickHouse does not lag”* or just *“ClickHouse is fast”*. +- 如果你直译,那就是“ClickHouse 不踩刹车”。 +- 如果你想尽可能接近一个有 IT 背景的俄罗斯人的听觉感受,那就是“如果你的大型系统延迟,不是因为它使用了 ClickHouse”。 +- 更短,但不那么精确的版本可能是“ClickHouse 不慢”,“ClickHouse 不卡顿”或仅仅“ClickHouse 很快”。 -If you haven’t seen one of those t-shirts in person, you can check them out online in many ClickHouse-related videos. For example, this one: +如果您还没有亲眼见过这些 T恤,可以在许多与 ClickHouse 相关的视频中在线查看。例如,这个: ![iframe](https://www.youtube.com/embed/bSyQahMVZ7w) -P.S. These t-shirts are not for sale, they are given away for free on most [ClickHouse Meetups](https://clickhouse.com/#meet), usually for best questions or other forms of active participation. +附言:这些 T恤不出售,它们在大多数 [ClickHouse 聚会](https://clickhouse.com/#meet)上免费赠送,通常是给出最佳问题或其他形式的积极参与者。 diff --git a/docs/zh/faq/general/why-clickhouse-is-so-fast.md b/docs/zh/faq/general/why-clickhouse-is-so-fast.md index a7df6aec207..ddfda87abb4 100644 --- a/docs/zh/faq/general/why-clickhouse-is-so-fast.md +++ b/docs/zh/faq/general/why-clickhouse-is-so-fast.md @@ -1,63 +1,63 @@ --- slug: /zh/faq/general/why-clickhouse-is-so-fast -title: Why is ClickHouse so fast? +title: 为什么 ClickHouse 如此快速? toc_hidden: true sidebar_position: 8 --- -# Why ClickHouse Is So Fast? {#why-clickhouse-is-so-fast} +# 为什么 ClickHouse 如此快速? {#why-clickhouse-is-so-fast} -It was designed to be fast. Query execution performance has always been a top priority during the development process, but other important characteristics like user-friendliness, scalability, and security were also considered so ClickHouse could become a real production system. +它被设计成一个快速的系统。在开发过程中,查询执行性能一直是首要考虑的优先级,但也考虑了其他重要特性,如用户友好性、可扩展性和安全性,使 ClickHouse 成为一个真正的生产系统。 -ClickHouse was initially built as a prototype to do just a single task well: to filter and aggregate data as fast as possible. That’s what needs to be done to build a typical analytical report and that’s what a typical [GROUP BY](../../sql-reference/statements/select/group-by.md) query does. ClickHouse team has made several high-level decisions that combined made achieving this task possible: +ClickHouse 最初是作为一个原型构建的,它的单一任务就是尽可能快速地过滤和聚合数据。这正是构建典型分析报告所需做的,也是典型 [GROUP BY](../../sql-reference/statements/select/group-by.md) 查询所做的。ClickHouse 团队做出了几个高层次的决策,这些决策组合在一起使得实现这一任务成为可能: -Column-oriented storage -: Source data often contain hundreds or even thousands of columns, while a report can use just a few of them. The system needs to avoid reading unnecessary columns, or most expensive disk read operations would be wasted. +列式存储 +: 源数据通常包含数百甚至数千列,而报告可能只使用其中的几列。系统需要避免读取不必要的列,否则大部分昂贵的磁盘读取操作将被浪费。 -Indexes -: ClickHouse keeps data structures in memory that allows reading not only used columns but only necessary row ranges of those columns. +索引 +: ClickHouse 在内存中保留数据结构,允许不仅读取使用的列,而且只读取这些列的必要行范围。 -Data compression -: Storing different values of the same column together often leads to better compression ratios (compared to row-oriented systems) because in real data column often has the same or not so many different values for neighboring rows. In addition to general-purpose compression, ClickHouse supports [specialized codecs](../../sql-reference/statements/create/table.mdx/#create-query-specialized-codecs) that can make data even more compact. +数据压缩 +: 将同一列的不同值存储在一起通常会导致更好的压缩比(与行式系统相比),因为在实际数据中列通常对相邻行有相同或不太多的不同值。除了通用压缩之外,ClickHouse 还支持 [专用编解码器](../../sql-reference/statements/create/table.mdx/#create-query-specialized-codecs),可以使数据更加紧凑。 -Vectorized query execution -: ClickHouse not only stores data in columns but also processes data in columns. It leads to better CPU cache utilization and allows for [SIMD](https://en.wikipedia.org/wiki/SIMD) CPU instructions usage. +向量化查询执行 +: ClickHouse 不仅以列的形式存储数据,而且以列的形式处理数据。这导致更好的 CPU 缓存利用率,并允许使用 [SIMD](https://en.wikipedia.org/wiki/SIMD) CPU 指令。 -Scalability -: ClickHouse can leverage all available CPU cores and disks to execute even a single query. Not only on a single server but all CPU cores and disks of a cluster as well. +可扩展性 +: ClickHouse 可以利用所有可用的 CPU 核心和磁盘来执行甚至是单个查询。不仅在单个服务器上,而且在集群的所有 CPU 核心和磁盘上。 -But many other database management systems use similar techniques. What really makes ClickHouse stand out is **attention to low-level details**. Most programming languages provide implementations for most common algorithms and data structures, but they tend to be too generic to be effective. Every task can be considered as a landscape with various characteristics, instead of just throwing in random implementation. For example, if you need a hash table, here are some key questions to consider: +但许多其他数据库管理系统也使用类似的技术。真正使 ClickHouse 脱颖而出的是 **对底层细节的关注**。大多数编程语言为最常见的算法和数据结构提供了实现,但它们往往过于通用而无法高效。每个任务都可以被视为具有各种特征的景观,而不是仅仅随意投入某个实现。例如,如果您需要一个哈希表,这里有一些关键问题需要考虑: -- Which hash function to choose? -- Collision resolution algorithm: [open addressing](https://en.wikipedia.org/wiki/Open_addressing) vs [chaining](https://en.wikipedia.org/wiki/Hash_table#Separate_chaining)? -- Memory layout: one array for keys and values or separate arrays? Will it store small or large values? -- Fill factor: when and how to resize? How to move values around on resize? -- Will values be removed and which algorithm will work better if they will? -- Will we need fast probing with bitmaps, inline placement of string keys, support for non-movable values, prefetch, and batching? +- 选择哪种哈希函数? +- 冲突解决算法:[开放寻址](https://en.wikipedia.org/wiki/Open_addressing)还是[链接](https://en.wikipedia.org/wiki/Hash_table#Separate_chaining)? +- 内存布局:一个数组用于键和值还是分开的数组?它会存储小值还是大值? +- 填充因子:何时以及如何调整大小?在调整大小时如何移动值? +- 是否会移除值,如果会,哪种算法会更好? +- 我们是否需要使用位图进行快速探测,字符串键的内联放置,对不可移动值的支持,预取和批处理? -Hash table is a key data structure for `GROUP BY` implementation and ClickHouse automatically chooses one of [30+ variations](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Aggregator.h) for each specific query. +哈希表是 `GROUP BY` 实现的关键数据结构,ClickHouse 会根据每个特定查询自动选择 [30 多种变体](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Aggregator.h) 中的一种。 -The same goes for algorithms, for example, in sorting you might consider: +算法也是如此,例如,在排序中,您可能会考虑: -- What will be sorted: an array of numbers, tuples, strings, or structures? -- Is all data available completely in RAM? -- Do we need a stable sort? -- Do we need a full sort? Maybe partial sort or n-th element will suffice? -- How to implement comparisons? -- Are we sorting data that has already been partially sorted? +- 将要排序的是数字数组、元组、字符串还是结构? +- 所有数据是否完全可用于 RAM? +- 我们需要稳定排序吗? +- 我们需要完全排序吗?也许部分排序或第 n 个元素就足够了? +- 如何实现比较? +- 我们正在对已经部分排序的数据进行排序吗? -Algorithms that they rely on characteristics of data they are working with can often do better than their generic counterparts. If it is not really known in advance, the system can try various implementations and choose the one that works best in runtime. For example, see an [article on how LZ4 decompression is implemented in ClickHouse](https://habr.com/en/company/yandex/blog/457612/). +他们所依赖的算法根据其所处理的数据特性,往往可以比通用算法做得更好。如果事先真的不知道,系统可以尝试各种实现,并在运行时选择最佳的一种。例如,看一篇关于 [ClickHouse 中 LZ4 解压缩是如何实现的文章](https://habr.com/en/company/yandex/blog/457612/)。 -Last but not least, the ClickHouse team always monitors the Internet on people claiming that they came up with the best implementation, algorithm, or data structure to do something and tries it out. Those claims mostly appear to be false, but from time to time you’ll indeed find a gem. +最后但同样重要的是,ClickHouse 团队始终关注互联网上人们声称他们提出了最佳的实现、算法或数据结构来做某事,并尝试它。这些声称大多是虚假的,但有时你确实会找到一颗宝石。 -:::info Tips for building your own high-performance software -- Keep in mind low-level details when designing your system. -- Design based on hardware capabilities. -- Choose data structures and abstractions based on the needs of the task. -- Provide specializations for special cases. -- Try new, “best” algorithms, that you read about yesterday. -- Choose an algorithm in runtime based on statistics. -- Benchmark on real datasets. -- Test for performance regressions in CI. -- Measure and observe everything. +:::info 构建高性能软件的提示 +- 设计系统时要考虑到底层细节。 +- 基于硬件能力进行设计。 +- 根据任务的需求选择数据结构和抽象。 +- 为特殊情况提供专门化。 +- 尝试您昨天阅读的关于新的“最佳”算法。 +- 根据统计数据在运行时选择算法。 +- 在真实数据集上进行基准测试。 +- 在 CI 中测试性能回归。 +- 测量并观察一切。 ::: diff --git a/docs/zh/faq/integration/json-import.md b/docs/zh/faq/integration/json-import.md index 2d5c687316d..730af8cc6da 100644 --- a/docs/zh/faq/integration/json-import.md +++ b/docs/zh/faq/integration/json-import.md @@ -1,35 +1,35 @@ --- slug: /zh/faq/integration/json-import -title: How to import JSON into ClickHouse? +title: 如何将 JSON 导入到 ClickHouse? toc_hidden: true sidebar_position: 11 --- -# How to Import JSON Into ClickHouse? {#how-to-import-json-into-clickhouse} +# 如何将 JSON 导入到 ClickHouse? {#how-to-import-json-into-clickhouse} -ClickHouse supports a wide range of [data formats for input and output](../../interfaces/formats.md). There are multiple JSON variations among them, but the most commonly used for data ingestion is [JSONEachRow](../../interfaces/formats.md#jsoneachrow). It expects one JSON object per row, each object separated by a newline. +ClickHouse 支持多种[输入和输出的数据格式](../../interfaces/formats.md)。其中包括多种 JSON 变体,但最常用于数据导入的是 [JSONEachRow](../../interfaces/formats.md#jsoneachrow)。它期望每行一个 JSON 对象,每个对象由一个新行分隔。 -## Examples {#examples} +## 示例 {#examples} -Using [HTTP interface](../../interfaces/http.md): +使用 [HTTP 接口](../../interfaces/http.md): ``` bash $ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test%20FORMAT%20JSONEachRow' --data-binary @- ``` -Using [CLI interface](../../interfaces/cli.md): +使用 [CLI接口](../../interfaces/cli.md): ``` bash $ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow" ``` -Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/index.md) instead. +除了手动插入数据外,您可能会考虑使用 [客户端库](../../interfaces/index.md) 之一。 -## Useful Settings {#useful-settings} +## 实用设置 {#useful-settings} -- `input_format_skip_unknown_fields` allows to insert JSON even if there were additional fields not present in table schema (by discarding them). -- `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) type. +- `input_format_skip_unknown_fields` 允许插入 JSON,即使存在表格架构中未出现的额外字段(通过丢弃它们)。 +- `input_format_import_nested_json` 允许将嵌套 JSON 对象插入到 [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) 类型的列中。 :::note -Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface. +对于 HTTP 接口,设置作为 `GET` 参数指定;对于 `CLI` 接口,则作为前缀为 -- 的附加命令行参数。 ::: \ No newline at end of file diff --git a/docs/zh/faq/integration/oracle-odbc.md b/docs/zh/faq/integration/oracle-odbc.md index e22db1d8960..ca65f08686c 100644 --- a/docs/zh/faq/integration/oracle-odbc.md +++ b/docs/zh/faq/integration/oracle-odbc.md @@ -1,16 +1,16 @@ --- slug: /zh/faq/integration/oracle-odbc -title: What if I have a problem with encodings when using Oracle via ODBC? +title: 使用 Oracle ODBC 时遇到编码问题怎么办? toc_hidden: true sidebar_position: 20 --- -# What If I Have a Problem with Encodings When Using Oracle Via ODBC? {#oracle-odbc-encodings} +# 使用 Oracle ODBC 时遇到编码问题怎么办? {#oracle-odbc-encodings} -If you use Oracle as a source of ClickHouse external dictionaries via Oracle ODBC driver, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html). +如果您使用 Oracle 作为 ClickHouse 外部字典的数据源,并通过 Oracle ODBC 驱动程序,您需要在 `/etc/default/clickhouse` 中为 `NLS_LANG` 环境变量设置正确的值。更多信息,请参阅 [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html)。 -**Example** +**示例** ``` sql NLS_LANG=RUSSIAN_RUSSIA.UTF8 -``` +``` \ No newline at end of file diff --git a/docs/zh/faq/operations/delete-old-data.md b/docs/zh/faq/operations/delete-old-data.md index 24181116bab..293ba8069fa 100644 --- a/docs/zh/faq/operations/delete-old-data.md +++ b/docs/zh/faq/operations/delete-old-data.md @@ -1,44 +1,44 @@ --- slug: /zh/faq/operations/delete-old-data -title: Is it possible to delete old records from a ClickHouse table? +title: 是否可以从ClickHouse表中删除旧记录? toc_hidden: true sidebar_position: 20 --- -# Is It Possible to Delete Old Records from a ClickHouse Table? {#is-it-possible-to-delete-old-records-from-a-clickhouse-table} +# 是否可以从ClickHouse表中删除旧记录? {#is-it-possible-to-delete-old-records-from-a-clickhouse-table} -The short answer is “yes”. ClickHouse has multiple mechanisms that allow freeing up disk space by removing old data. Each mechanism is aimed for different scenarios. +简短的答案是“可以”。ClickHouse具有多种机制,允许通过删除旧数据来释放磁盘空间。每种机制都针对不同的场景。 ## TTL {#ttl} -ClickHouse allows to automatically drop values when some condition happens. This condition is configured as an expression based on any columns, usually just static offset for any timestamp column. +ClickHouse 允许在某些条件发生时自动删除值。这个条件被配置为基于任何列的表达式,通常只是针对任何时间戳列的静态偏移量。 -The key advantage of this approach is that it does not need any external system to trigger, once TTL is configured, data removal happens automatically in background. +这种方法的主要优势是它不需要任何外部系统来触发,一旦配置了 TTL,数据删除就会自动在后台发生。 :::note -TTL can also be used to move data not only to [/dev/null](https://en.wikipedia.org/wiki/Null_device), but also between different storage systems, like from SSD to HDD. +TTL 也可以用来将数据移动到非 [/dev/null](https://en.wikipedia.org/wiki/Null_device) 的不同存储系统,例如从 SSD 到 HDD。 ::: -More details on [configuring TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). +有关 [配置 TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的更多详细信息。 ## ALTER DELETE {#alter-delete} -ClickHouse does not have real-time point deletes like in [OLTP](https://en.wikipedia.org/wiki/Online_transaction_processing) databases. The closest thing to them are mutations. They are issued as `ALTER ... DELETE` or `ALTER ... UPDATE` queries to distinguish from normal `DELETE` or `UPDATE` as they are asynchronous batch operations, not immediate modifications. The rest of syntax after `ALTER TABLE` prefix is similar. +ClickHouse没有像[OLTP](https://en.wikipedia.org/wiki/Online_transaction_processing)数据库那样的实时点删除。最接近的东西是 `Mutation`,执行 `ALTER ... DELETE` 或 `ALTER ... UPDATE` 查询,以区别于普通的`DELETE`或`UPDATE`。因为它们是异步批处理操作,而不是立即修改。`ALTER TABLE`前缀后的其余语法相似。 -`ALTER DELETE` can be issued to flexibly remove old data. If you need to do it regularly, the main downside will be the need to have an external system to submit the query. There are also some performance considerations since mutation rewrite complete parts even there’s only a single row to be deleted. +`ALTER DELETE`可以灵活地用来删除旧数据。如果你需要定期这样做,主要缺点将是需要有一个外部系统来提交查询。还有一些性能方面的考虑,因为即使只有一行要被删除,突变也会重写完整部分。 -This is the most common approach to make your system based on ClickHouse [GDPR](https://gdpr-info.eu)-compliant. +这是使基于ClickHouse的系统符合[GDPR](https://gdpr-info.eu)的最常见方法。 -More details on [mutations](../../sql-reference/statements/alter.md/#alter-mutations). +有关[mutations](../../sql-reference/statements/alter.md/#alter-mutations)的更多详细信息。 ## DROP PARTITION {#drop-partition} -`ALTER TABLE ... DROP PARTITION` provides a cost-efficient way to drop a whole partition. It’s not that flexible and needs proper partitioning scheme configured on table creation, but still covers most common cases. Like mutations need to be executed from an external system for regular use. +`ALTER TABLE ... DROP PARTITION`提供了一种成本效率高的方式来删除整个分区。它不是那么灵活,需要在创建表时配置适当的分区方案,但仍然涵盖了大多数常见情况。像 mutations 一样,需要从外部系统执行以进行常规使用。 -More details on [manipulating partitions](../../sql-reference/statements/alter/partition.mdx/#alter_drop-partition). +有关[操作分区](../../sql-reference/statements/alter/partition.mdx/#alter_drop-partition)的更多详细信息。 ## TRUNCATE {#truncate} -It’s rather radical to drop all data from a table, but in some cases it might be exactly what you need. +从表中删除所有数据是相当激进的,但在某些情况下可能正是您所需要的。 -More details on [table truncation](../../sql-reference/statements/truncate.md). \ No newline at end of file +有关[truncate](../../sql-reference/statements/truncate.md)的更多详细信息。 diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8ffe56b390c..8281afbf320 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -76,8 +76,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include "MetricsTransmitter.h" @@ -2004,6 +2004,12 @@ try else LOG_INFO(log, "Closed all listening sockets."); + /// Wait for unfinished backups and restores. + /// This must be done after closing listening sockets (no more backups/restores) but before ProcessList::killAllQueries + /// (because killAllQueries() will cancel all running backups/restores). + if (server_settings.shutdown_wait_backups_and_restores) + global_context->waitAllBackupsAndRestores(); + /// Killing remaining queries. if (!server_settings.shutdown_wait_unfinished_queries) global_context->getProcessList().killAllQueries(); diff --git a/programs/server/config.xml b/programs/server/config.xml index e1428b17084..0dc271692b8 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -440,6 +440,9 @@ 10000 + + /var/lib/clickhouse/caches/ + false diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 981a7aafc6f..351bcb95c73 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -242,7 +242,10 @@ HTTPAuthClientParams parseHTTPAuthParams(const Poco::Util::AbstractConfiguration size_t connection_timeout_ms = config.getInt(prefix + ".connection_timeout_ms", 1000); size_t receive_timeout_ms = config.getInt(prefix + ".receive_timeout_ms", 1000); size_t send_timeout_ms = config.getInt(prefix + ".send_timeout_ms", 1000); - http_auth_params.timeouts = ConnectionTimeouts{connection_timeout_ms, receive_timeout_ms, send_timeout_ms}; + http_auth_params.timeouts = ConnectionTimeouts() + .withConnectionTimeout(Poco::Timespan(connection_timeout_ms * 1000)) + .withReceiveTimeout(Poco::Timespan(receive_timeout_ms * 1000)) + .withSendTimeout(Poco::Timespan(send_timeout_ms * 1000)); http_auth_params.max_tries = config.getInt(prefix + ".max_tries", 3); http_auth_params.retry_initial_backoff_ms = config.getInt(prefix + ".retry_initial_backoff_ms", 50); diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index f20fb8cb933..8ac75e4451c 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -350,7 +350,7 @@ struct Adder if constexpr (Data::is_able_to_parallelize_merge) { - if (data.set.isSingleLevel() && data.set.size() > 100'000) + if (data.set.isSingleLevel() && data.set.worthConvertingToTwoLevel(data.set.size())) data.set.convertToTwoLevel(); } } diff --git a/src/AggregateFunctions/UniqExactSet.h b/src/AggregateFunctions/UniqExactSet.h index 06157405cc5..e8c0de660ff 100644 --- a/src/AggregateFunctions/UniqExactSet.h +++ b/src/AggregateFunctions/UniqExactSet.h @@ -11,10 +11,16 @@ namespace DB { +namespace ErrorCodes +{ +extern const int TOO_LARGE_ARRAY_SIZE; +} + template class UniqExactSet { static_assert(std::is_same_v); + static_assert(std::is_same_v); public: using value_type = typename SingleLevelSet::value_type; @@ -147,7 +153,31 @@ public: } } - void read(ReadBuffer & in) { asSingleLevel().read(in); } + void read(ReadBuffer & in) + { + size_t new_size = 0; + auto * const position = in.position(); + readVarUInt(new_size, in); + if (new_size > 100'000'000'000) + throw DB::Exception( + DB::ErrorCodes::TOO_LARGE_ARRAY_SIZE, "The size of serialized hash table is suspiciously large: {}", new_size); + + if (worthConvertingToTwoLevel(new_size)) + { + two_level_set = std::make_shared(new_size); + for (size_t i = 0; i < new_size; ++i) + { + typename SingleLevelSet::Cell x; + x.read(in); + asTwoLevel().insert(x.getValue()); + } + } + else + { + in.position() = position; // Rollback position + asSingleLevel().read(in); + } + } void write(WriteBuffer & out) const { @@ -166,6 +196,8 @@ public: return two_level_set ? two_level_set : std::make_shared(asSingleLevel()); } + static bool worthConvertingToTwoLevel(size_t size) { return size > 100'000; } + void convertToTwoLevel() { two_level_set = getTwoLevelSet(); diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp index 1f4ec36a8fa..f3bf41cb1e9 100644 --- a/src/Analyzer/FunctionNode.cpp +++ b/src/Analyzer/FunctionNode.cpp @@ -236,7 +236,7 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const new_options.add_cast_for_constants = false; /// Avoid cast for `IN tuple(...)` expression. - /// Tuples colud be quite big, and adding a type may significantly increase query size. + /// Tuples could be quite big, and adding a type may significantly increase query size. /// It should be safe because set type for `column IN tuple` is deduced from `column` type. if (isNameOfInFunction(function_name) && argument_nodes.size() > 1 && argument_nodes[1]->getNodeType() == QueryTreeNodeType::CONSTANT) new_options.add_cast_for_constants = false; diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index c71eb9e3aca..e33c6565321 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1208,6 +1208,8 @@ private: static void validateJoinTableExpressionWithoutAlias(const QueryTreeNodePtr & join_node, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope); + static void checkDuplicateTableNamesOrAlias(const QueryTreeNodePtr & join_node, QueryTreeNodePtr & left_table_expr, QueryTreeNodePtr & right_table_expr, IdentifierResolveScope & scope); + static std::pair recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into); static void expandGroupByAll(QueryNode & query_tree_node_typed); @@ -2244,12 +2246,16 @@ void QueryAnalyzer::validateJoinTableExpressionWithoutAlias(const QueryTreeNodeP if (table_expression_has_alias) return; + if (join_node->as().getKind() == JoinKind::Paste) + return; + auto * query_node = table_expression_node->as(); auto * union_node = table_expression_node->as(); if ((query_node && !query_node->getCTEName().empty()) || (union_node && !union_node->getCTEName().empty())) return; auto table_expression_node_type = table_expression_node->getNodeType(); + if (table_expression_node_type == QueryTreeNodeType::TABLE_FUNCTION || table_expression_node_type == QueryTreeNodeType::QUERY || table_expression_node_type == QueryTreeNodeType::UNION) @@ -6855,6 +6861,39 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif } } +void QueryAnalyzer::checkDuplicateTableNamesOrAlias(const QueryTreeNodePtr & join_node, QueryTreeNodePtr & left_table_expr, QueryTreeNodePtr & right_table_expr, IdentifierResolveScope & scope) +{ + Names column_names; + if (!scope.context->getSettingsRef().joined_subquery_requires_alias) + return; + + if (join_node->as().getKind() != JoinKind::Paste) + return; + + auto * left_node = left_table_expr->as(); + auto * right_node = right_table_expr->as(); + + if (!left_node && !right_node) + return; + + if (left_node) + for (const auto & name_and_type : left_node->getProjectionColumns()) + column_names.push_back(name_and_type.name); + if (right_node) + for (const auto & name_and_type : right_node->getProjectionColumns()) + column_names.push_back(name_and_type.name); + + if (column_names.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Names of projection columns cannot be empty"); + + std::sort(column_names.begin(), column_names.end()); + for (size_t i = 0; i < column_names.size() - 1; i++) // Check if there is no any duplicates because it will lead to broken result + if (column_names[i] == column_names[i+1]) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Name of columns and aliases should be unique for this query (you can add/change aliases to avoid duplication)" + "While processing '{}'", join_node->formatASTForErrorMessage()); +} + /// Resolve join node in scope void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveScope & scope, QueryExpressionsAliasVisitor & expressions_visitor) { @@ -6866,6 +6905,9 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS resolveQueryJoinTreeNode(join_node_typed.getRightTableExpression(), scope, expressions_visitor); validateJoinTableExpressionWithoutAlias(join_node, join_node_typed.getRightTableExpression(), scope); + if (!join_node_typed.getLeftTableExpression()->hasAlias() && !join_node_typed.getRightTableExpression()->hasAlias()) + checkDuplicateTableNamesOrAlias(join_node, join_node_typed.getLeftTableExpression(), join_node_typed.getRightTableExpression(), scope); + if (join_node_typed.isOnJoinExpression()) { expressions_visitor.visit(join_node_typed.getJoinExpression()); diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp new file mode 100644 index 00000000000..7887a1b7175 --- /dev/null +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp @@ -0,0 +1,134 @@ +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +/** Rewrites `sum(column +/- literal)` into two individual functions + * `sum(column)` and `literal * count(column)`. + * sum(column + literal) -> sum(column) + literal * count(column) + * sum(literal + column) -> literal * count(column) + sum(column) + * sum(column - literal) -> sum(column) - literal * count(column) + * sum(literal - column) -> literal * count(column) - sum(column) + */ + +namespace +{ + +class RewriteSumFunctionWithSumAndCountVisitor : public InDepthQueryTreeVisitorWithContext +{ +public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + + void enterImpl(QueryTreeNodePtr & node) + { + static const std::unordered_set func_supported = { + "plus", + "minus" + }; + + auto * function_node = node->as(); + if (!function_node || Poco::toLower(function_node->getFunctionName()) != "sum") + return; + + const auto & function_nodes = function_node->getArguments().getNodes(); + if (function_nodes.size() != 1) + return; + + const auto * func_plus_minus_node = function_nodes[0]->as(); + if (!func_plus_minus_node || !func_supported.contains(Poco::toLower(func_plus_minus_node->getFunctionName()))) + return; + + const auto & func_plus_minus_nodes = func_plus_minus_node->getArguments().getNodes(); + if (func_plus_minus_nodes.size() != 2) + return; + + size_t column_id; + if (func_plus_minus_nodes[0]->as() && func_plus_minus_nodes[1]->as()) + column_id = 0; + else if (func_plus_minus_nodes[0]->as() && func_plus_minus_nodes[1]->as()) + column_id = 1; + else + return; + + size_t literal_id = 1 - column_id; + const auto * literal = func_plus_minus_nodes[literal_id]->as(); + if (!literal) + return; + + const auto literal_type = literal->getResultType(); + if (!literal_type || !WhichDataType(literal_type).isNumber()) + return; + + const auto * column_node = func_plus_minus_nodes[column_id]->as(); + if (!column_node) + return; + + const auto column_type = column_node->getColumnType(); + if (!column_type || !isNumber(column_type)) + return; + + const auto lhs = std::make_shared("sum"); + lhs->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); + resolveAsAggregateFunctionNode(*lhs, column_type); + + const auto rhs_count = std::make_shared("count"); + rhs_count->getArguments().getNodes().push_back(func_plus_minus_nodes[column_id]); + resolveAsAggregateFunctionNode(*rhs_count, column_type); + + const auto rhs = std::make_shared("multiply"); + rhs->getArguments().getNodes().push_back(func_plus_minus_nodes[literal_id]); + rhs->getArguments().getNodes().push_back(rhs_count); + resolveOrdinaryFunctionNode(*rhs, rhs->getFunctionName()); + + const auto new_node = std::make_shared(Poco::toLower(func_plus_minus_node->getFunctionName())); + if (column_id == 0) + new_node->getArguments().getNodes() = {lhs, rhs}; + else if (column_id == 1) + new_node->getArguments().getNodes() = {rhs, lhs}; + resolveOrdinaryFunctionNode(*new_node, new_node->getFunctionName()); + + if (!new_node) + return; + + node = new_node; + + } + +private: + void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const + { + const auto function = FunctionFactory::instance().get(function_name, getContext()); + function_node.resolveAsFunction(function->build(function_node.getArgumentColumns())); + } + + static inline void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type) + { + AggregateFunctionProperties properties; + const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(), + NullsAction::EMPTY, + {argument_type}, + {}, + properties); + + function_node.resolveAsAggregateFunction(aggregate_function); + } + +}; + +} + +void RewriteSumFunctionWithSumAndCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) +{ + RewriteSumFunctionWithSumAndCountVisitor visitor(std::move(context)); + visitor.visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.h b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.h new file mode 100644 index 00000000000..cb76448b34a --- /dev/null +++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.h @@ -0,0 +1,19 @@ +#pragma once + +#include + +namespace DB +{ + +class RewriteSumFunctionWithSumAndCountPass final : public IQueryTreePass +{ +public: + String getName() override { return "RewriteSumFunctionWithSumAndCountPass"; } + + String getDescription() override { return "Rewrite sum(column +/- literal) into sum(column) and literal * count(column)"; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; + +}; + +} diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index 58fd1d5937b..d2b90419a8b 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -256,6 +257,7 @@ void addQueryTreePasses(QueryTreePassManager & manager) manager.addPass(std::make_unique()); + manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp index b659887e0da..4662f436aba 100644 --- a/src/Backups/BackupCoordinationRemote.cpp +++ b/src/Backups/BackupCoordinationRemote.cpp @@ -162,7 +162,8 @@ BackupCoordinationRemote::BackupCoordinationRemote( const Strings & all_hosts_, const String & current_host_, bool plain_backup_, - bool is_internal_) + bool is_internal_, + QueryStatusPtr process_list_element_) : root_zookeeper_path(root_zookeeper_path_) , zookeeper_path(root_zookeeper_path_ + "/backup-" + backup_uuid_) , keeper_settings(keeper_settings_) @@ -177,6 +178,7 @@ BackupCoordinationRemote::BackupCoordinationRemote( log, get_zookeeper_, keeper_settings, + process_list_element_, [my_zookeeper_path = zookeeper_path, my_current_host = current_host, my_is_internal = is_internal] (WithRetries::FaultyKeeper & zk) { diff --git a/src/Backups/BackupCoordinationRemote.h b/src/Backups/BackupCoordinationRemote.h index f3b6bff3a68..81980ee5637 100644 --- a/src/Backups/BackupCoordinationRemote.h +++ b/src/Backups/BackupCoordinationRemote.h @@ -30,7 +30,8 @@ public: const Strings & all_hosts_, const String & current_host_, bool plain_backup_, - bool is_internal_); + bool is_internal_, + QueryStatusPtr process_list_element_); ~BackupCoordinationRemote() override; diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 564a518689a..bf9cf50a67a 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -87,6 +87,7 @@ BackupEntriesCollector::BackupEntriesCollector( , backup_coordination(backup_coordination_) , read_settings(read_settings_) , context(context_) + , process_list_element(context->getProcessListElement()) , on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000)) , collect_metadata_timeout(context->getConfigRef().getUInt64( "backups.collect_metadata_timeout", context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 600000))) @@ -158,8 +159,9 @@ BackupEntries BackupEntriesCollector::run() Strings BackupEntriesCollector::setStage(const String & new_stage, const String & message) { LOG_TRACE(log, "Setting stage: {}", new_stage); - current_stage = new_stage; + checkIsQueryCancelled(); + current_stage = new_stage; backup_coordination->setStage(new_stage, message); if (new_stage == Stage::formatGatheringMetadata(0)) @@ -179,6 +181,12 @@ Strings BackupEntriesCollector::setStage(const String & new_stage, const String } } +void BackupEntriesCollector::checkIsQueryCancelled() const +{ + if (process_list_element) + process_list_element->checkTimeLimit(); +} + /// Calculates the root path for collecting backup entries, /// it's either empty or has the format "shards//replicas//". void BackupEntriesCollector::calculateRootPathInBackup() @@ -413,6 +421,8 @@ void BackupEntriesCollector::gatherDatabaseMetadata( bool all_tables, const std::set & except_table_names) { + checkIsQueryCancelled(); + auto it = database_infos.find(database_name); if (it == database_infos.end()) { @@ -491,6 +501,8 @@ void BackupEntriesCollector::gatherDatabaseMetadata( void BackupEntriesCollector::gatherTablesMetadata() { + checkIsQueryCancelled(); + table_infos.clear(); for (const auto & [database_name, database_info] : database_infos) { @@ -552,6 +564,8 @@ std::vector> BackupEntriesCollector::findTablesInD const auto & database_info = database_infos.at(database_name); const auto & database = database_info.database; + checkIsQueryCancelled(); + auto filter_by_table_name = [my_database_info = &database_info](const String & table_name) { /// We skip inner tables of materialized views. @@ -629,8 +643,12 @@ void BackupEntriesCollector::lockTablesForReading() for (auto & [table_name, table_info] : table_infos) { auto storage = table_info.storage; - if (storage) - table_info.table_lock = storage->tryLockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); + if (!storage) + continue; + + checkIsQueryCancelled(); + + table_info.table_lock = storage->tryLockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); } std::erase_if( @@ -734,6 +752,7 @@ void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs() continue; /// We store CREATE DATABASE queries only if there was BACKUP DATABASE specified. LOG_TRACE(log, "Adding the definition of database {} to backup", backQuoteIfNeed(database_name)); + checkIsQueryCancelled(); ASTPtr new_create_query = database_info.create_database_query; adjustCreateQueryForBackup(new_create_query, context->getGlobalContext(), nullptr); @@ -750,6 +769,7 @@ void BackupEntriesCollector::makeBackupEntriesForTablesDefs() for (auto & [table_name, table_info] : table_infos) { LOG_TRACE(log, "Adding the definition of {} to backup", tableNameWithTypeToString(table_name.database, table_name.table, false)); + checkIsQueryCancelled(); ASTPtr new_create_query = table_info.create_table_query; adjustCreateQueryForBackup(new_create_query, context->getGlobalContext(), &table_info.replicated_table_shared_id); @@ -802,6 +822,7 @@ void BackupEntriesCollector::makeBackupEntriesForTableData(const QualifiedTableN } LOG_TRACE(log, "Collecting data of {} for backup", tableNameWithTypeToString(table_name.database, table_name.table, false)); + checkIsQueryCancelled(); try { @@ -861,13 +882,17 @@ void BackupEntriesCollector::addPostTask(std::function task) void BackupEntriesCollector::runPostTasks() { LOG_TRACE(log, "Will run {} post tasks", post_tasks.size()); + /// Post collecting tasks can add other post collecting tasks, our code is fine with that. while (!post_tasks.empty()) { + checkIsQueryCancelled(); + auto task = std::move(post_tasks.front()); post_tasks.pop(); std::move(task)(); } + LOG_TRACE(log, "All post tasks successfully executed"); } diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index 45d1ba1652a..fcbc5e5985f 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -22,6 +22,9 @@ class IDatabase; using DatabasePtr = std::shared_ptr; struct StorageID; enum class AccessEntityType; +class QueryStatus; +using QueryStatusPtr = std::shared_ptr; + /// Collects backup entries for all databases and tables which should be put to a backup. class BackupEntriesCollector : private boost::noncopyable @@ -97,11 +100,15 @@ private: Strings setStage(const String & new_stage, const String & message = ""); + /// Throws an exception if the BACKUP query was cancelled. + void checkIsQueryCancelled() const; + const ASTBackupQuery::Elements backup_query_elements; const BackupSettings backup_settings; std::shared_ptr backup_coordination; const ReadSettings read_settings; ContextPtr context; + QueryStatusPtr process_list_element; /// The time a BACKUP ON CLUSTER or RESTORE ON CLUSTER command will wait until all the nodes receive the BACKUP (or RESTORE) query and start working. /// This setting is similar to `distributed_ddl_task_timeout`. diff --git a/src/Backups/BackupEntryFromMemory.h b/src/Backups/BackupEntryFromMemory.h index d8bc0eb966d..2e942989464 100644 --- a/src/Backups/BackupEntryFromMemory.h +++ b/src/Backups/BackupEntryFromMemory.h @@ -17,7 +17,12 @@ public: std::unique_ptr getReadBuffer(const ReadSettings &) const override; UInt64 getSize() const override { return data.size(); } - DataSourceDescription getDataSourceDescription() const override { return DataSourceDescription{DataSourceType::RAM, "", false, false}; } + DataSourceDescription getDataSourceDescription() const override + { + DataSourceDescription res; + res.type = DataSourceType::RAM; + return res; + } private: const String data; diff --git a/src/Backups/BackupFileInfo.cpp b/src/Backups/BackupFileInfo.cpp index 2f3942f89c4..2a1642f3b84 100644 --- a/src/Backups/BackupFileInfo.cpp +++ b/src/Backups/BackupFileInfo.cpp @@ -7,6 +7,8 @@ #include #include #include +#include + #include @@ -203,7 +205,7 @@ BackupFileInfo buildFileInfoForBackupEntry( return info; } -BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, const ReadSettings & read_settings, ThreadPool & thread_pool) +BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, const ReadSettings & read_settings, ThreadPool & thread_pool, QueryStatusPtr process_list_element) { BackupFileInfos infos; infos.resize(backup_entries.size()); @@ -225,7 +227,7 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr ++num_active_jobs; } - auto job = [&mutex, &num_active_jobs, &event, &exception, &infos, &backup_entries, &read_settings, &base_backup, &thread_group, i, log]() + auto job = [&mutex, &num_active_jobs, &event, &exception, &infos, &backup_entries, &read_settings, &base_backup, &thread_group, &process_list_element, i, log]() { SCOPE_EXIT_SAFE({ std::lock_guard lock{mutex}; @@ -250,6 +252,9 @@ BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entr return; } + if (process_list_element) + process_list_element->checkTimeLimit(); + infos[i] = buildFileInfoForBackupEntry(name, entry, base_backup, read_settings, log); } catch (...) diff --git a/src/Backups/BackupFileInfo.h b/src/Backups/BackupFileInfo.h index 009fee091e0..15c38595002 100644 --- a/src/Backups/BackupFileInfo.h +++ b/src/Backups/BackupFileInfo.h @@ -14,6 +14,8 @@ using BackupPtr = std::shared_ptr; using BackupEntryPtr = std::shared_ptr; using BackupEntries = std::vector>; struct ReadSettings; +class QueryStatus; +using QueryStatusPtr = std::shared_ptr; /// Information about a file stored in a backup. @@ -78,6 +80,6 @@ using BackupFileInfos = std::vector; BackupFileInfo buildFileInfoForBackupEntry(const String & file_name, const BackupEntryPtr & backup_entry, const BackupPtr & base_backup, const ReadSettings & read_settings, Poco::Logger * log); /// Builds a vector of BackupFileInfos for specified backup entries. -BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, const ReadSettings & read_settings, ThreadPool & thread_pool); +BackupFileInfos buildFileInfosForBackupEntries(const BackupEntries & backup_entries, const BackupPtr & base_backup, const ReadSettings & read_settings, ThreadPool & thread_pool, QueryStatusPtr process_list_element); } diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index d143d813a2f..381c58dd045 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -126,7 +126,7 @@ BackupReaderS3::BackupReaderS3( const ContextPtr & context_) : BackupReaderDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupReaderS3")) , s3_uri(s3_uri_) - , data_source_description{DataSourceType::S3, s3_uri.endpoint, false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString())) { auto & request_settings = s3_settings.request_settings; @@ -216,7 +216,7 @@ BackupWriterS3::BackupWriterS3( const ContextPtr & context_) : BackupWriterDefault(read_settings_, write_settings_, &Poco::Logger::get("BackupWriterS3")) , s3_uri(s3_uri_) - , data_source_description{DataSourceType::S3, s3_uri.endpoint, false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString())) { auto & request_settings = s3_settings.request_settings; diff --git a/src/Backups/BackupStatus.cpp b/src/Backups/BackupStatus.cpp index 53adaa577ea..1eefc54f746 100644 --- a/src/Backups/BackupStatus.cpp +++ b/src/Backups/BackupStatus.cpp @@ -21,12 +21,16 @@ std::string_view toString(BackupStatus backup_status) return "BACKUP_CREATED"; case BackupStatus::BACKUP_FAILED: return "BACKUP_FAILED"; + case BackupStatus::BACKUP_CANCELLED: + return "BACKUP_CANCELLED"; case BackupStatus::RESTORING: return "RESTORING"; case BackupStatus::RESTORED: return "RESTORED"; case BackupStatus::RESTORE_FAILED: return "RESTORE_FAILED"; + case BackupStatus::RESTORE_CANCELLED: + return "RESTORE_CANCELLED"; default: break; } diff --git a/src/Backups/BackupStatus.h b/src/Backups/BackupStatus.h index 0afe6efe899..aeec1417b54 100644 --- a/src/Backups/BackupStatus.h +++ b/src/Backups/BackupStatus.h @@ -18,6 +18,10 @@ enum class BackupStatus RESTORED, RESTORE_FAILED, + /// Statuses used after a BACKUP or RESTORE operation was cancelled. + BACKUP_CANCELLED, + RESTORE_CANCELLED, + MAX, }; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index bb8d3d3cc60..9c1b6d8af97 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -45,6 +45,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int CONCURRENT_ACCESS_NOT_SUPPORTED; + extern const int QUERY_WAS_CANCELLED; } using OperationID = BackupOperationID; @@ -73,7 +74,8 @@ namespace all_hosts, backup_settings.host_id, !backup_settings.deduplicate_files, - backup_settings.internal); + backup_settings.internal, + context->getProcessListElement()); } else { @@ -110,7 +112,8 @@ namespace toString(*restore_settings.restore_uuid), all_hosts, restore_settings.host_id, - restore_settings.internal); + restore_settings.internal, + context->getProcessListElement()); } else { @@ -150,17 +153,52 @@ namespace } } - bool isFinalStatus(BackupStatus status) + bool isFinishedSuccessfully(BackupStatus status) { - return (status == BackupStatus::BACKUP_CREATED) || (status == BackupStatus::BACKUP_FAILED) || (status == BackupStatus::RESTORED) - || (status == BackupStatus::RESTORE_FAILED); + return (status == BackupStatus::BACKUP_CREATED) || (status == BackupStatus::RESTORED); } - bool isErrorStatus(BackupStatus status) + bool isFailed(BackupStatus status) { return (status == BackupStatus::BACKUP_FAILED) || (status == BackupStatus::RESTORE_FAILED); } + bool isCancelled(BackupStatus status) + { + return (status == BackupStatus::BACKUP_CANCELLED) || (status == BackupStatus::RESTORE_CANCELLED); + } + + bool isFailedOrCancelled(BackupStatus status) + { + return isFailed(status) || isCancelled(status); + } + + bool isFinalStatus(BackupStatus status) + { + return isFinishedSuccessfully(status) || isFailedOrCancelled(status); + } + + bool isBackupStatus(BackupStatus status) + { + return (status == BackupStatus::CREATING_BACKUP) || (status == BackupStatus::BACKUP_CREATED) || (status == BackupStatus::BACKUP_FAILED) || (status == BackupStatus::BACKUP_CANCELLED); + } + + BackupStatus getBackupStatusFromCurrentException() + { + if (getCurrentExceptionCode() == ErrorCodes::QUERY_WAS_CANCELLED) + return BackupStatus::BACKUP_CANCELLED; + else + return BackupStatus::BACKUP_FAILED; + } + + BackupStatus getRestoreStatusFromCurrentException() + { + if (getCurrentExceptionCode() == ErrorCodes::QUERY_WAS_CANCELLED) + return BackupStatus::RESTORE_CANCELLED; + else + return BackupStatus::RESTORE_FAILED; + } + /// Used to change num_active_backups. size_t getNumActiveBackupsChange(BackupStatus status) { @@ -337,13 +375,15 @@ private: }; -BackupsWorker::BackupsWorker(ContextPtr global_context, size_t num_backup_threads, size_t num_restore_threads, bool allow_concurrent_backups_, bool allow_concurrent_restores_) +BackupsWorker::BackupsWorker(ContextMutablePtr global_context, size_t num_backup_threads, size_t num_restore_threads, bool allow_concurrent_backups_, bool allow_concurrent_restores_, bool test_inject_sleep_) : thread_pools(std::make_unique(num_backup_threads, num_restore_threads)) , allow_concurrent_backups(allow_concurrent_backups_) , allow_concurrent_restores(allow_concurrent_restores_) + , test_inject_sleep(test_inject_sleep_) , log(&Poco::Logger::get("BackupsWorker")) + , backup_log(global_context->getBackupLog()) + , process_list(global_context->getProcessList()) { - backup_log = global_context->getBackupLog(); } @@ -400,7 +440,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context try { - addInfo(backup_id, backup_name_for_logging, base_backup_name, backup_settings.internal, BackupStatus::CREATING_BACKUP); + addInfo(backup_id, backup_name_for_logging, base_backup_name, backup_settings.internal, context->getProcessListElement(), BackupStatus::CREATING_BACKUP); /// Prepare context to use. ContextPtr context_in_use = context; @@ -408,8 +448,9 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context bool on_cluster = !backup_query->cluster.empty(); if (on_cluster || backup_settings.async) { - /// For ON CLUSTER queries we will need to change some settings. - /// For ASYNC queries we have to clone the context anyway. + /// We have to clone the query context here because: + /// if this is an "ON CLUSTER" query we need to change some settings, and + /// if this is an "ASYNC" query it's going to be executed in another thread. context_in_use = mutable_context = Context::createCopy(context); mutable_context->makeQueryContext(); } @@ -417,8 +458,22 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context if (backup_settings.async) { auto & thread_pool = getThreadPool(on_cluster ? ThreadPoolId::BACKUP_ASYNC_ON_CLUSTER : ThreadPoolId::BACKUP_ASYNC); + + /// process_list_element_holder is used to make an element in ProcessList live while BACKUP is working asynchronously. + auto process_list_element = context_in_use->getProcessListElement(); + thread_pool.scheduleOrThrowOnError( - [this, backup_query, backup_id, backup_name_for_logging, backup_info, backup_settings, backup_coordination, context_in_use, mutable_context] + [this, + backup_query, + backup_id, + backup_name_for_logging, + backup_info, + backup_settings, + backup_coordination, + context_in_use, + mutable_context, + thread_group = CurrentThread::getGroup(), + process_list_element_holder = process_list_element ? process_list_element->getProcessListEntry() : nullptr] { doBackup( backup_query, @@ -429,6 +484,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context backup_coordination, context_in_use, mutable_context, + thread_group, /* called_async= */ true); }); } @@ -443,6 +499,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context backup_coordination, context_in_use, mutable_context, + nullptr, /* called_async= */ false); } @@ -452,7 +509,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context { tryLogCurrentException(log, fmt::format("Failed to start {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging)); /// Something bad happened, the backup has not built. - setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED); + setStatusSafe(backup_id, getBackupStatusFromCurrentException()); sendCurrentExceptionToCoordination(backup_coordination); throw; } @@ -468,19 +525,22 @@ void BackupsWorker::doBackup( std::shared_ptr backup_coordination, const ContextPtr & context, ContextMutablePtr mutable_context, + ThreadGroupPtr thread_group, bool called_async) { - std::optional query_scope; + SCOPE_EXIT_SAFE( + if (called_async && thread_group) + CurrentThread::detachFromGroupIfNotDetached(); + ); + try { + if (called_async && thread_group) + CurrentThread::attachToGroup(thread_group); if (called_async) - { - query_scope.emplace(mutable_context); setThreadName("BackupWorker"); - } bool on_cluster = !backup_query->cluster.empty(); - assert(mutable_context || (!on_cluster && !called_async)); /// Checks access rights if this is not ON CLUSTER query. @@ -557,8 +617,8 @@ void BackupsWorker::doBackup( } /// Write the backup entries to the backup. - buildFileInfosForBackupEntries(backup, backup_entries, backup_create_params.read_settings, backup_coordination); - writeBackupEntries(backup, std::move(backup_entries), backup_id, backup_coordination, backup_settings.internal); + buildFileInfosForBackupEntries(backup, backup_entries, backup_create_params.read_settings, backup_coordination, context->getProcessListElement()); + writeBackupEntries(backup, std::move(backup_entries), backup_id, backup_coordination, backup_settings.internal, context->getProcessListElement()); /// We have written our backup entries, we need to tell other hosts (they could be waiting for it). backup_coordination->setStage(Stage::COMPLETED,""); @@ -596,7 +656,7 @@ void BackupsWorker::doBackup( if (called_async) { tryLogCurrentException(log, fmt::format("Failed to make {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging)); - setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED); + setStatusSafe(backup_id, getBackupStatusFromCurrentException()); sendCurrentExceptionToCoordination(backup_coordination); } else @@ -608,15 +668,21 @@ void BackupsWorker::doBackup( } -void BackupsWorker::buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, const ReadSettings & read_settings, std::shared_ptr backup_coordination) +void BackupsWorker::buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, const ReadSettings & read_settings, std::shared_ptr backup_coordination, QueryStatusPtr process_list_element) { backup_coordination->setStage(Stage::BUILDING_FILE_INFOS, ""); backup_coordination->waitForStage(Stage::BUILDING_FILE_INFOS); - backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), read_settings, getThreadPool(ThreadPoolId::BACKUP_MAKE_FILES_LIST))); + backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), read_settings, getThreadPool(ThreadPoolId::BACKUP_MAKE_FILES_LIST), process_list_element)); } -void BackupsWorker::writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, const OperationID & backup_id, std::shared_ptr backup_coordination, bool internal) +void BackupsWorker::writeBackupEntries( + BackupMutablePtr backup, + BackupEntries && backup_entries, + const OperationID & backup_id, + std::shared_ptr backup_coordination, + bool internal, + QueryStatusPtr process_list_element) { LOG_TRACE(log, "{}, num backup entries={}", Stage::WRITING_BACKUP, backup_entries.size()); backup_coordination->setStage(Stage::WRITING_BACKUP, ""); @@ -677,7 +743,13 @@ void BackupsWorker::writeBackupEntries(BackupMutablePtr backup, BackupEntries && return; } + if (process_list_element) + process_list_element->checkTimeLimit(); + backup->writeFile(file_info, std::move(entry)); + + maybeSleepForTesting(); + // Update metadata if (!internal) { @@ -690,7 +762,6 @@ void BackupsWorker::writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup->getCompressedSize(), 0, 0); } - } catch (...) { @@ -752,15 +823,16 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt if (restore_settings.base_backup_info) base_backup_name = restore_settings.base_backup_info->toStringForLogging(); - addInfo(restore_id, backup_name_for_logging, base_backup_name, restore_settings.internal, BackupStatus::RESTORING); + addInfo(restore_id, backup_name_for_logging, base_backup_name, restore_settings.internal, context->getProcessListElement(), BackupStatus::RESTORING); /// Prepare context to use. ContextMutablePtr context_in_use = context; bool on_cluster = !restore_query->cluster.empty(); if (restore_settings.async || on_cluster) { - /// For ON CLUSTER queries we will need to change some settings. - /// For ASYNC queries we have to clone the context anyway. + /// We have to clone the query context here because: + /// if this is an "ON CLUSTER" query we need to change some settings, and + /// if this is an "ASYNC" query it's going to be executed in another thread. context_in_use = Context::createCopy(context); context_in_use->makeQueryContext(); } @@ -768,8 +840,21 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt if (restore_settings.async) { auto & thread_pool = getThreadPool(on_cluster ? ThreadPoolId::RESTORE_ASYNC_ON_CLUSTER : ThreadPoolId::RESTORE_ASYNC); + + /// process_list_element_holder is used to make an element in ProcessList live while RESTORE is working asynchronously. + auto process_list_element = context_in_use->getProcessListElement(); + thread_pool.scheduleOrThrowOnError( - [this, restore_query, restore_id, backup_name_for_logging, backup_info, restore_settings, restore_coordination, context_in_use] + [this, + restore_query, + restore_id, + backup_name_for_logging, + backup_info, + restore_settings, + restore_coordination, + context_in_use, + thread_group = CurrentThread::getGroup(), + process_list_element_holder = process_list_element ? process_list_element->getProcessListEntry() : nullptr] { doRestore( restore_query, @@ -779,6 +864,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt restore_settings, restore_coordination, context_in_use, + thread_group, /* called_async= */ true); }); } @@ -792,6 +878,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt restore_settings, restore_coordination, context_in_use, + nullptr, /* called_async= */ false); } @@ -800,7 +887,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt catch (...) { /// Something bad happened, the backup has not built. - setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED); + setStatusSafe(restore_id, getRestoreStatusFromCurrentException()); sendCurrentExceptionToCoordination(restore_coordination); throw; } @@ -815,16 +902,20 @@ void BackupsWorker::doRestore( RestoreSettings restore_settings, std::shared_ptr restore_coordination, ContextMutablePtr context, + ThreadGroupPtr thread_group, bool called_async) { - std::optional query_scope; + SCOPE_EXIT_SAFE( + if (called_async && thread_group) + CurrentThread::detachFromGroupIfNotDetached(); + ); + try { + if (called_async && thread_group) + CurrentThread::attachToGroup(thread_group); if (called_async) - { - query_scope.emplace(context); setThreadName("RestoreWorker"); - } /// Open the backup for reading. BackupFactory::CreateParams backup_open_params; @@ -913,7 +1004,7 @@ void BackupsWorker::doRestore( } /// Execute the data restoring tasks. - restoreTablesData(restore_id, backup, std::move(data_restore_tasks), getThreadPool(ThreadPoolId::RESTORE_TABLES_DATA)); + restoreTablesData(restore_id, backup, std::move(data_restore_tasks), getThreadPool(ThreadPoolId::RESTORE_TABLES_DATA), context->getProcessListElement()); /// We have restored everything, we need to tell other hosts (they could be waiting for it). restore_coordination->setStage(Stage::COMPLETED, ""); @@ -928,7 +1019,7 @@ void BackupsWorker::doRestore( if (called_async) { tryLogCurrentException(log, fmt::format("Failed to restore from {} {}", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging)); - setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED); + setStatusSafe(restore_id, getRestoreStatusFromCurrentException()); sendCurrentExceptionToCoordination(restore_coordination); } else @@ -940,7 +1031,7 @@ void BackupsWorker::doRestore( } -void BackupsWorker::restoreTablesData(const OperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool) +void BackupsWorker::restoreTablesData(const OperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool, QueryStatusPtr process_list_element) { size_t num_active_jobs = 0; std::mutex mutex; @@ -980,7 +1071,13 @@ void BackupsWorker::restoreTablesData(const OperationID & restore_id, BackupPtr return; } + if (process_list_element) + process_list_element->checkTimeLimit(); + std::move(task)(); + + maybeSleepForTesting(); + setNumFilesAndSize( restore_id, backup->getNumFiles(), @@ -1011,9 +1108,10 @@ void BackupsWorker::restoreTablesData(const OperationID & restore_id, BackupPtr } -void BackupsWorker::addInfo(const OperationID & id, const String & name, const String & base_backup_name, bool internal, BackupStatus status) +void BackupsWorker::addInfo(const OperationID & id, const String & name, const String & base_backup_name, bool internal, QueryStatusPtr process_list_element, BackupStatus status) { - BackupOperationInfo info; + ExtendedOperationInfo extended_info; + auto & info = extended_info.info; info.id = id; info.name = name; info.base_backup_name = base_backup_name; @@ -1021,7 +1119,16 @@ void BackupsWorker::addInfo(const OperationID & id, const String & name, const S info.status = status; info.start_time = std::chrono::system_clock::now(); - if (isFinalStatus(status)) + bool is_final_status = isFinalStatus(status); + + if (process_list_element) + { + info.profile_counters = process_list_element->getInfo(/* get_thread_list= */ false, /* get_profile_events= */ true, /* get_settings= */ false).profile_counters; + if (!is_final_status) + extended_info.process_list_element = process_list_element; + } + + if (is_final_status) info.end_time = info.start_time; std::lock_guard lock{infos_mutex}; @@ -1030,7 +1137,7 @@ void BackupsWorker::addInfo(const OperationID & id, const String & name, const S if (it != infos.end()) { /// It's better not allow to overwrite the current status if it's in progress. - auto current_status = it->second.status; + auto current_status = it->second.info.status; if (!isFinalStatus(current_status)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot start a backup or restore: ID {} is already in use", id); } @@ -1038,7 +1145,7 @@ void BackupsWorker::addInfo(const OperationID & id, const String & name, const S if (backup_log) backup_log->add(BackupLogElement{info}); - infos[id] = std::move(info); + infos[id] = std::move(extended_info); num_active_backups += getNumActiveBackupsChange(status); num_active_restores += getNumActiveRestoresChange(status); @@ -1057,16 +1164,24 @@ void BackupsWorker::setStatus(const String & id, BackupStatus status, bool throw return; } - auto & info = it->second; + auto & extended_info = it->second; + auto & info = extended_info.info; + auto old_status = info.status; - info.status = status; - info.profile_counters = std::make_shared(CurrentThread::getProfileEvents().getPartiallyAtomicSnapshot()); + bool is_final_status = isFinalStatus(status); - if (isFinalStatus(status)) + if (extended_info.process_list_element) + { + info.profile_counters = extended_info.process_list_element->getInfo(/* get_thread_list= */ false, /* get_profile_events= */ true, /* get_settings= */ false).profile_counters; + if (is_final_status) + extended_info.process_list_element = nullptr; + } + + if (is_final_status) info.end_time = std::chrono::system_clock::now(); - if (isErrorStatus(status)) + if (isFailedOrCancelled(status)) { info.error_message = getCurrentExceptionMessage(false); info.exception = std::current_exception(); @@ -1077,6 +1192,9 @@ void BackupsWorker::setStatus(const String & id, BackupStatus status, bool throw num_active_backups += getNumActiveBackupsChange(status) - getNumActiveBackupsChange(old_status); num_active_restores += getNumActiveRestoresChange(status) - getNumActiveRestoresChange(old_status); + + if (status != old_status) + status_changed.notify_all(); } @@ -1090,7 +1208,7 @@ void BackupsWorker::setNumFilesAndSize(const OperationID & id, size_t num_files, if (it == infos.end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup ID {}", id); - auto & info = it->second; + auto & info = it->second.info; info.num_files = num_files; info.total_size = total_size; info.num_entries = num_entries; @@ -1101,37 +1219,120 @@ void BackupsWorker::setNumFilesAndSize(const OperationID & id, size_t num_files, } -void BackupsWorker::wait(const OperationID & id, bool rethrow_exception) +void BackupsWorker::maybeSleepForTesting() const +{ + if (test_inject_sleep) + sleepForSeconds(1); +} + + +void BackupsWorker::wait(const OperationID & backup_or_restore_id, bool rethrow_exception) { std::unique_lock lock{infos_mutex}; status_changed.wait(lock, [&] { - auto it = infos.find(id); + auto it = infos.find(backup_or_restore_id); if (it == infos.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup ID {}", id); - const auto & info = it->second; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup ID {}", backup_or_restore_id); + const auto & info = it->second.info; auto current_status = info.status; - if (rethrow_exception && isErrorStatus(current_status)) + if (rethrow_exception && isFailedOrCancelled(current_status)) std::rethrow_exception(info.exception); - return isFinalStatus(current_status); + if (isFinalStatus(current_status)) + return true; + LOG_INFO(log, "Waiting {} {}", isBackupStatus(info.status) ? "backup" : "restore", info.name); + return false; }); } +void BackupsWorker::waitAll() +{ + std::vector current_operations; + { + std::lock_guard lock{infos_mutex}; + for (const auto & [id, extended_info] : infos) + if (!isFinalStatus(extended_info.info.status)) + current_operations.push_back(id); + } + + if (current_operations.empty()) + return; + + LOG_INFO(log, "Waiting for running backups and restores to finish"); + + for (const auto & id : current_operations) + wait(id, /* rethrow_exception= */ false); + + LOG_INFO(log, "Backups and restores finished"); +} + +void BackupsWorker::cancel(const BackupOperationID & backup_or_restore_id, bool wait_) +{ + QueryStatusPtr process_list_element; + { + std::unique_lock lock{infos_mutex}; + auto it = infos.find(backup_or_restore_id); + if (it == infos.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup ID {}", backup_or_restore_id); + + const auto & extended_info = it->second; + const auto & info = extended_info.info; + if (isFinalStatus(info.status) || !extended_info.process_list_element) + return; + + LOG_INFO(log, "Cancelling {} {}", isBackupStatus(info.status) ? "backup" : "restore", info.name); + process_list_element = extended_info.process_list_element; + } + + process_list.sendCancelToQuery(process_list_element); + + if (wait_) + wait(backup_or_restore_id, /* rethrow_exception= */ false); +} + + +void BackupsWorker::cancelAll(bool wait_) +{ + std::vector current_operations; + { + std::lock_guard lock{infos_mutex}; + for (const auto & [id, extended_info] : infos) + if (!isFinalStatus(extended_info.info.status)) + current_operations.push_back(id); + } + + if (current_operations.empty()) + return; + + LOG_INFO(log, "Cancelling running backups and restores"); + + for (const auto & id : current_operations) + cancel(id, /* wait= */ false); + + if (wait_) + for (const auto & id : current_operations) + wait(id, /* rethrow_exception= */ false); + + LOG_INFO(log, "Backups and restores finished or stopped"); +} + + BackupOperationInfo BackupsWorker::getInfo(const OperationID & id) const { std::lock_guard lock{infos_mutex}; auto it = infos.find(id); if (it == infos.end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup ID {}", id); - return it->second; + return it->second.info; } std::vector BackupsWorker::getAllInfos() const { std::vector res_infos; std::lock_guard lock{infos_mutex}; - for (const auto & info : infos | boost::adaptors::map_values) + for (const auto & extended_info : infos | boost::adaptors::map_values) { + const auto & info = extended_info.info; if (!info.internal) res_infos.push_back(info); } @@ -1140,14 +1341,11 @@ std::vector BackupsWorker::getAllInfos() const void BackupsWorker::shutdown() { - bool has_active_backups_and_restores = (num_active_backups || num_active_restores); - if (has_active_backups_and_restores) - LOG_INFO(log, "Waiting for {} backups and {} restores to be finished", num_active_backups, num_active_restores); + /// Cancel running backups and restores. + cancelAll(/* wait= */ true); + /// Wait for our thread pools (it must be done before destroying them). thread_pools->wait(); - - if (has_active_backups_and_restores) - LOG_INFO(log, "All backup and restore tasks have finished"); } } diff --git a/src/Backups/BackupsWorker.h b/src/Backups/BackupsWorker.h index e2bd076314f..7a514e7032b 100644 --- a/src/Backups/BackupsWorker.h +++ b/src/Backups/BackupsWorker.h @@ -26,13 +26,26 @@ using BackupEntries = std::vector>; struct ReadSettings; class BackupLog; +class ThreadGroup; +using ThreadGroupPtr = std::shared_ptr; +class QueryStatus; +using QueryStatusPtr = std::shared_ptr; +class ProcessList; + /// Manager of backups and restores: executes backups and restores' threads in the background. /// Keeps information about backups and restores started in this session. class BackupsWorker { public: - BackupsWorker(ContextPtr global_context, size_t num_backup_threads, size_t num_restore_threads, bool allow_concurrent_backups_, bool allow_concurrent_restores_); + BackupsWorker( + ContextMutablePtr global_context, + size_t num_backup_threads, + size_t num_restore_threads, + bool allow_concurrent_backups_, + bool allow_concurrent_restores_, + bool test_inject_sleep_); + ~BackupsWorker(); /// Waits until all tasks have been completed. @@ -41,10 +54,20 @@ public: /// Starts executing a BACKUP or RESTORE query. Returns ID of the operation. BackupOperationID start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context); - /// Waits until a BACKUP or RESTORE query started by start() is finished. + /// Waits until the specified backup or restore operation finishes or stops. /// The function returns immediately if the operation is already finished. void wait(const BackupOperationID & backup_or_restore_id, bool rethrow_exception = true); + /// Waits until all running backup and restore operations finish or stop. + void waitAll(); + + /// Cancels the specified backup or restore operation. + /// The function does nothing if this operation has already finished. + void cancel(const BackupOperationID & backup_or_restore_id, bool wait_ = true); + + /// Cancels all running backup and restore operations. + void cancelAll(bool wait_ = true); + BackupOperationInfo getInfo(const BackupOperationID & id) const; std::vector getAllInfos() const; @@ -60,13 +83,14 @@ private: std::shared_ptr backup_coordination, const ContextPtr & context, ContextMutablePtr mutable_context, + ThreadGroupPtr thread_group, bool called_async); /// Builds file infos for specified backup entries. - void buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, const ReadSettings & read_settings, std::shared_ptr backup_coordination); + void buildFileInfosForBackupEntries(const BackupPtr & backup, const BackupEntries & backup_entries, const ReadSettings & read_settings, std::shared_ptr backup_coordination, QueryStatusPtr process_list_element); /// Write backup entries to an opened backup. - void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, const BackupOperationID & backup_id, std::shared_ptr backup_coordination, bool internal); + void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, const BackupOperationID & backup_id, std::shared_ptr backup_coordination, bool internal, QueryStatusPtr process_list_element); BackupOperationID startRestoring(const ASTPtr & query, ContextMutablePtr context); @@ -78,12 +102,13 @@ private: RestoreSettings restore_settings, std::shared_ptr restore_coordination, ContextMutablePtr context, + ThreadGroupPtr thread_group, bool called_async); /// Run data restoring tasks which insert data to tables. - void restoreTablesData(const BackupOperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool); + void restoreTablesData(const BackupOperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool, QueryStatusPtr process_list_element); - void addInfo(const BackupOperationID & id, const String & name, const String & base_backup_name, bool internal, BackupStatus status); + void addInfo(const BackupOperationID & id, const String & name, const String & base_backup_name, bool internal, QueryStatusPtr process_list_element, BackupStatus status); void setStatus(const BackupOperationID & id, BackupStatus status, bool throw_if_error = true); void setStatusSafe(const String & id, BackupStatus status) { setStatus(id, status, false); } void setNumFilesAndSize(const BackupOperationID & id, size_t num_files, UInt64 total_size, size_t num_entries, @@ -92,19 +117,33 @@ private: enum class ThreadPoolId; ThreadPool & getThreadPool(ThreadPoolId thread_pool_id); + /// Waits for some time if `test_inject_sleep` is true. + void maybeSleepForTesting() const; + class ThreadPools; std::unique_ptr thread_pools; const bool allow_concurrent_backups; const bool allow_concurrent_restores; + const bool test_inject_sleep; + Poco::Logger * log; - std::unordered_map infos; - std::shared_ptr backup_log; + struct ExtendedOperationInfo + { + BackupOperationInfo info; + QueryStatusPtr process_list_element; /// to cancel this operation if we want to + }; + + std::unordered_map infos; + std::condition_variable status_changed; std::atomic num_active_backups = 0; std::atomic num_active_restores = 0; mutable std::mutex infos_mutex; + + std::shared_ptr backup_log; + ProcessList & process_list; }; } diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp index 190634de4a9..0d2b3832bad 100644 --- a/src/Backups/RestoreCoordinationRemote.cpp +++ b/src/Backups/RestoreCoordinationRemote.cpp @@ -21,7 +21,8 @@ RestoreCoordinationRemote::RestoreCoordinationRemote( const String & restore_uuid_, const Strings & all_hosts_, const String & current_host_, - bool is_internal_) + bool is_internal_, + QueryStatusPtr process_list_element_) : get_zookeeper(get_zookeeper_) , root_zookeeper_path(root_zookeeper_path_) , keeper_settings(keeper_settings_) @@ -36,6 +37,7 @@ RestoreCoordinationRemote::RestoreCoordinationRemote( log, get_zookeeper_, keeper_settings, + process_list_element_, [my_zookeeper_path = zookeeper_path, my_current_host = current_host, my_is_internal = is_internal] (WithRetries::FaultyKeeper & zk) { diff --git a/src/Backups/RestoreCoordinationRemote.h b/src/Backups/RestoreCoordinationRemote.h index 5ff0e6de002..f7e678645df 100644 --- a/src/Backups/RestoreCoordinationRemote.h +++ b/src/Backups/RestoreCoordinationRemote.h @@ -21,7 +21,8 @@ public: const String & restore_uuid_, const Strings & all_hosts_, const String & current_host_, - bool is_internal_); + bool is_internal_, + QueryStatusPtr process_list_element_); ~RestoreCoordinationRemote() override; diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index a33773f19ab..f218410e599 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -85,6 +86,7 @@ RestorerFromBackup::RestorerFromBackup( , restore_coordination(restore_coordination_) , backup(backup_) , context(context_) + , process_list_element(context->getProcessListElement()) , on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000)) , create_table_timeout(context->getConfigRef().getUInt64("backups.create_table_timeout", 300000)) , log(&Poco::Logger::get("RestorerFromBackup")) @@ -138,6 +140,8 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode) void RestorerFromBackup::setStage(const String & new_stage, const String & message) { LOG_TRACE(log, "Setting stage: {}", new_stage); + checkIsQueryCancelled(); + current_stage = new_stage; if (restore_coordination) @@ -150,6 +154,12 @@ void RestorerFromBackup::setStage(const String & new_stage, const String & messa } } +void RestorerFromBackup::checkIsQueryCancelled() const +{ + if (process_list_element) + process_list_element->checkTimeLimit(); +} + void RestorerFromBackup::findRootPathsInBackup() { size_t shard_num = 1; @@ -563,6 +573,8 @@ void RestorerFromBackup::createDatabase(const String & database_name) const if (database_info.is_predefined_database) return; + checkIsQueryCancelled(); + auto create_database_query = typeid_cast>(database_info.create_database_query->clone()); /// Generate a new UUID for a database. @@ -709,6 +721,8 @@ void RestorerFromBackup::createTable(const QualifiedTableName & table_name) if (table_info.is_predefined_table) return; + checkIsQueryCancelled(); + auto create_table_query = typeid_cast>(table_info.create_table_query->clone()); /// Generate a new UUID for a table (the same table on different hosts must use the same UUID, `restore_coordination` will make it so). @@ -790,6 +804,8 @@ void RestorerFromBackup::insertDataToTable(const QualifiedTableName & table_name auto & table_info = table_infos.at(table_name); auto storage = table_info.storage; + checkIsQueryCancelled(); + try { const auto & data_path_in_backup = table_info.data_path_in_backup; diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index 194478bd8b4..fad79a3a2e6 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -21,6 +21,8 @@ using DatabasePtr = std::shared_ptr; class AccessRestorerFromBackup; struct IAccessEntity; using AccessEntityPtr = std::shared_ptr; +class QueryStatus; +using QueryStatusPtr = std::shared_ptr; /// Restores the definition of databases and tables and prepares tasks to restore the data of the tables. @@ -74,6 +76,7 @@ private: std::shared_ptr restore_coordination; BackupPtr backup; ContextMutablePtr context; + QueryStatusPtr process_list_element; std::chrono::milliseconds on_cluster_first_sync_timeout; std::chrono::milliseconds create_table_timeout; Poco::Logger * log; @@ -107,6 +110,9 @@ private: void setStage(const String & new_stage, const String & message = ""); + /// Throws an exception if the RESTORE query was cancelled. + void checkIsQueryCancelled() const; + struct DatabaseInfo { ASTPtr create_database_query; diff --git a/src/Backups/WithRetries.cpp b/src/Backups/WithRetries.cpp index 55809dc6958..db36bc92d92 100644 --- a/src/Backups/WithRetries.cpp +++ b/src/Backups/WithRetries.cpp @@ -21,10 +21,11 @@ WithRetries::KeeperSettings WithRetries::KeeperSettings::fromContext(ContextPtr } WithRetries::WithRetries( - Poco::Logger * log_, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings_, RenewerCallback callback_) + Poco::Logger * log_, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings_, QueryStatusPtr process_list_element_, RenewerCallback callback_) : log(log_) , get_zookeeper(get_zookeeper_) , settings(settings_) + , process_list_element(process_list_element_) , callback(callback_) , global_zookeeper_retries_info( settings.keeper_max_retries, settings.keeper_retry_initial_backoff_ms, settings.keeper_retry_max_backoff_ms) @@ -32,7 +33,7 @@ WithRetries::WithRetries( WithRetries::RetriesControlHolder::RetriesControlHolder(const WithRetries * parent, const String & name) : info(parent->global_zookeeper_retries_info) - , retries_ctl(name, parent->log, info, nullptr) + , retries_ctl(name, parent->log, info, parent->process_list_element) , faulty_zookeeper(parent->getFaultyZooKeeper()) {} diff --git a/src/Backups/WithRetries.h b/src/Backups/WithRetries.h index 8f4a730e6a1..edfccc658d9 100644 --- a/src/Backups/WithRetries.h +++ b/src/Backups/WithRetries.h @@ -52,7 +52,7 @@ public: }; RetriesControlHolder createRetriesControlHolder(const String & name); - WithRetries(Poco::Logger * log, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings, RenewerCallback callback); + WithRetries(Poco::Logger * log, zkutil::GetZooKeeper get_zookeeper_, const KeeperSettings & settings, QueryStatusPtr process_list_element_, RenewerCallback callback); /// Used to re-establish new connection inside a retry loop. void renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const; @@ -65,6 +65,8 @@ private: Poco::Logger * log; zkutil::GetZooKeeper get_zookeeper; KeeperSettings settings; + QueryStatusPtr process_list_element; + /// This callback is called each time when a new [Zoo]Keeper session is created. /// In backups it is primarily used to re-create an ephemeral node to signal the coordinator /// that the host is alive and able to continue writing the backup. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 083b959c4b6..027e8a01fb1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -83,10 +83,11 @@ add_subdirectory (Formats) add_headers_and_sources(clickhouse_common_io Common) add_headers_and_sources(clickhouse_common_io Common/HashTable) +add_headers_and_sources(clickhouse_common_io Common/Scheduler) +add_headers_and_sources(clickhouse_common_io Common/Scheduler/Nodes) add_headers_and_sources(clickhouse_common_io Common/SSH) add_headers_and_sources(clickhouse_common_io IO) add_headers_and_sources(clickhouse_common_io IO/Archives) -add_headers_and_sources(clickhouse_common_io IO/Resource) add_headers_and_sources(clickhouse_common_io IO/S3) list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp) diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 1da3065f38f..959b9b5a0e3 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -103,14 +103,19 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati compression = config.getBool("compression", host != "localhost" && !isLocalAddress(DNSResolver::instance().resolveHost(host))) ? Protocol::Compression::Enable : Protocol::Compression::Disable; - timeouts = ConnectionTimeouts( - Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0), - Poco::Timespan(config.getInt("handshake_timeout_ms", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC * 1000), 0)); - - timeouts.sync_request_timeout = Poco::Timespan(config.getInt("sync_request_timeout", DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC), 0); + timeouts = ConnectionTimeouts() + .withConnectionTimeout( + Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0)) + .withSendTimeout( + Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0)) + .withReceiveTimeout( + Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0)) + .withTcpKeepAliveTimeout( + Poco::Timespan(config.getInt("tcp_keep_alive_timeout", DEFAULT_TCP_KEEP_ALIVE_TIMEOUT), 0)) + .withHandshakeTimeout( + Poco::Timespan(config.getInt("handshake_timeout_ms", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC * 1000) * 1000)) + .withSyncRequestTimeout( + Poco::Timespan(config.getInt("sync_request_timeout", DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC), 0)); } ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config, diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 4406114a955..43166659b18 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -118,18 +118,18 @@ ConnectionPoolWithFailover::Status ConnectionPoolWithFailover::getStatus() const return result; } -std::vector ConnectionPoolWithFailover::getMany(const ConnectionTimeouts & timeouts, - const Settings & settings, - PoolMode pool_mode, - AsyncCallback async_callback, - std::optional skip_unavailable_endpoints) +std::vector ConnectionPoolWithFailover::getMany( + const ConnectionTimeouts & timeouts, + const Settings & settings, + PoolMode pool_mode, + AsyncCallback async_callback, + std::optional skip_unavailable_endpoints, + GetPriorityForLoadBalancing::Func priority_func) { TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message) - { - return tryGetEntry(pool, timeouts, fail_message, settings, nullptr, async_callback); - }; + { return tryGetEntry(pool, timeouts, fail_message, settings, nullptr, async_callback); }; - std::vector results = getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints); + std::vector results = getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints, priority_func); std::vector entries; entries.reserve(results.size()); @@ -153,17 +153,17 @@ std::vector ConnectionPoolWithFailover::g std::vector ConnectionPoolWithFailover::getManyChecked( const ConnectionTimeouts & timeouts, - const Settings & settings, PoolMode pool_mode, + const Settings & settings, + PoolMode pool_mode, const QualifiedTableName & table_to_check, AsyncCallback async_callback, - std::optional skip_unavailable_endpoints) + std::optional skip_unavailable_endpoints, + GetPriorityForLoadBalancing::Func priority_func) { TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message) - { - return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, async_callback); - }; + { return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, async_callback); }; - return getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints); + return getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints, priority_func); } ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings & settings) @@ -175,14 +175,16 @@ ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::ma } std::vector ConnectionPoolWithFailover::getManyImpl( - const Settings & settings, - PoolMode pool_mode, - const TryGetEntryFunc & try_get_entry, - std::optional skip_unavailable_endpoints) + const Settings & settings, + PoolMode pool_mode, + const TryGetEntryFunc & try_get_entry, + std::optional skip_unavailable_endpoints, + GetPriorityForLoadBalancing::Func priority_func) { if (nested_pools.empty()) - throw DB::Exception(DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED, - "Cannot get connection from ConnectionPoolWithFailover cause nested pools are empty"); + throw DB::Exception( + DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED, + "Cannot get connection from ConnectionPoolWithFailover cause nested pools are empty"); if (!skip_unavailable_endpoints.has_value()) skip_unavailable_endpoints = settings.skip_unavailable_shards; @@ -203,14 +205,13 @@ std::vector ConnectionPoolWithFailover::g else throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown pool allocation mode"); - GetPriorityFunc get_priority = makeGetPriorityFunc(settings); + if (!priority_func) + priority_func = makeGetPriorityFunc(settings); UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value; bool fallback_to_stale_replicas = settings.fallback_to_stale_replicas_for_distributed_queries.value; - return Base::getMany(min_entries, max_entries, max_tries, - max_ignored_errors, fallback_to_stale_replicas, - try_get_entry, get_priority); + return Base::getMany(min_entries, max_entries, max_tries, max_ignored_errors, fallback_to_stale_replicas, try_get_entry, priority_func); } ConnectionPoolWithFailover::TryResult @@ -251,11 +252,14 @@ ConnectionPoolWithFailover::tryGetEntry( return result; } -std::vector ConnectionPoolWithFailover::getShuffledPools(const Settings & settings) +std::vector +ConnectionPoolWithFailover::getShuffledPools(const Settings & settings, GetPriorityForLoadBalancing::Func priority_func) { - GetPriorityFunc get_priority = makeGetPriorityFunc(settings); + if (!priority_func) + priority_func = makeGetPriorityFunc(settings); + UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value; - return Base::getShuffledPools(max_ignored_errors, get_priority); + return Base::getShuffledPools(max_ignored_errors, priority_func); } } diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index 208a003edb8..eaef717a2d6 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -54,10 +54,13 @@ public: /** Allocates up to the specified number of connections to work. * Connections provide access to different replicas of one shard. */ - std::vector getMany(const ConnectionTimeouts & timeouts, - const Settings & settings, PoolMode pool_mode, - AsyncCallback async_callback = {}, - std::optional skip_unavailable_endpoints = std::nullopt); + std::vector getMany( + const ConnectionTimeouts & timeouts, + const Settings & settings, + PoolMode pool_mode, + AsyncCallback async_callback = {}, + std::optional skip_unavailable_endpoints = std::nullopt, + GetPriorityForLoadBalancing::Func priority_func = {}); /// The same as getMany(), but return std::vector. std::vector getManyForTableFunction(const ConnectionTimeouts & timeouts, @@ -69,12 +72,13 @@ public: /// The same as getMany(), but check that replication delay for table_to_check is acceptable. /// Delay threshold is taken from settings. std::vector getManyChecked( - const ConnectionTimeouts & timeouts, - const Settings & settings, - PoolMode pool_mode, - const QualifiedTableName & table_to_check, - AsyncCallback async_callback = {}, - std::optional skip_unavailable_endpoints = std::nullopt); + const ConnectionTimeouts & timeouts, + const Settings & settings, + PoolMode pool_mode, + const QualifiedTableName & table_to_check, + AsyncCallback async_callback = {}, + std::optional skip_unavailable_endpoints = std::nullopt, + GetPriorityForLoadBalancing::Func priority_func = {}); struct NestedPoolStatus { @@ -87,7 +91,7 @@ public: using Status = std::vector; Status getStatus() const; - std::vector getShuffledPools(const Settings & settings); + std::vector getShuffledPools(const Settings & settings, GetPriorityFunc priority_func = {}); size_t getMaxErrorCup() const { return Base::max_error_cap; } @@ -96,13 +100,16 @@ public: Base::updateSharedErrorCounts(shuffled_pools); } + size_t getPoolSize() const { return Base::getPoolSize(); } + private: /// Get the values of relevant settings and call Base::getMany() std::vector getManyImpl( - const Settings & settings, - PoolMode pool_mode, - const TryGetEntryFunc & try_get_entry, - std::optional skip_unavailable_endpoints = std::nullopt); + const Settings & settings, + PoolMode pool_mode, + const TryGetEntryFunc & try_get_entry, + std::optional skip_unavailable_endpoints = std::nullopt, + GetPriorityForLoadBalancing::Func priority_func = {}); /// Try to get a connection from the pool and check that it is good. /// If table_to_check is not null and the check is enabled in settings, check that replication delay @@ -115,7 +122,7 @@ private: const QualifiedTableName * table_to_check = nullptr, AsyncCallback async_callback = {}); - GetPriorityFunc makeGetPriorityFunc(const Settings & settings); + GetPriorityForLoadBalancing::Func makeGetPriorityFunc(const Settings & settings); GetPriorityForLoadBalancing get_priority_load_balancing; }; diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index 0c69d7712ea..7ea13a7dffc 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -28,16 +28,18 @@ HedgedConnections::HedgedConnections( const ThrottlerPtr & throttler_, PoolMode pool_mode, std::shared_ptr table_to_check_, - AsyncCallback async_callback) + AsyncCallback async_callback, + GetPriorityForLoadBalancing::Func priority_func) : hedged_connections_factory( - pool_, - context_->getSettingsRef(), - timeouts_, - context_->getSettingsRef().connections_with_failover_max_tries.value, - context_->getSettingsRef().fallback_to_stale_replicas_for_distributed_queries.value, - context_->getSettingsRef().max_parallel_replicas.value, - context_->getSettingsRef().skip_unavailable_shards.value, - table_to_check_) + pool_, + context_->getSettingsRef(), + timeouts_, + context_->getSettingsRef().connections_with_failover_max_tries.value, + context_->getSettingsRef().fallback_to_stale_replicas_for_distributed_queries.value, + context_->getSettingsRef().max_parallel_replicas.value, + context_->getSettingsRef().skip_unavailable_shards.value, + table_to_check_, + priority_func) , context(std::move(context_)) , settings(context->getSettingsRef()) , throttler(throttler_) diff --git a/src/Client/HedgedConnections.h b/src/Client/HedgedConnections.h index ccdc59965e2..5bc274332db 100644 --- a/src/Client/HedgedConnections.h +++ b/src/Client/HedgedConnections.h @@ -70,13 +70,15 @@ public: size_t index; }; - HedgedConnections(const ConnectionPoolWithFailoverPtr & pool_, - ContextPtr context_, - const ConnectionTimeouts & timeouts_, - const ThrottlerPtr & throttler, - PoolMode pool_mode, - std::shared_ptr table_to_check_ = nullptr, - AsyncCallback async_callback = {}); + HedgedConnections( + const ConnectionPoolWithFailoverPtr & pool_, + ContextPtr context_, + const ConnectionTimeouts & timeouts_, + const ThrottlerPtr & throttler, + PoolMode pool_mode, + std::shared_ptr table_to_check_ = nullptr, + AsyncCallback async_callback = {}, + GetPriorityForLoadBalancing::Func priority_func = {}); void sendScalarsData(Scalars & data) override; diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index 6ac504772e2..f7b5ceedc96 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -29,7 +29,8 @@ HedgedConnectionsFactory::HedgedConnectionsFactory( bool fallback_to_stale_replicas_, UInt64 max_parallel_replicas_, bool skip_unavailable_shards_, - std::shared_ptr table_to_check_) + std::shared_ptr table_to_check_, + GetPriorityForLoadBalancing::Func priority_func) : pool(pool_) , timeouts(timeouts_) , table_to_check(table_to_check_) @@ -39,7 +40,7 @@ HedgedConnectionsFactory::HedgedConnectionsFactory( , max_parallel_replicas(max_parallel_replicas_) , skip_unavailable_shards(skip_unavailable_shards_) { - shuffled_pools = pool->getShuffledPools(settings_); + shuffled_pools = pool->getShuffledPools(settings_, priority_func); for (auto shuffled_pool : shuffled_pools) replicas.emplace_back(std::make_unique(shuffled_pool.pool, &timeouts, settings_, log, table_to_check.get())); } @@ -323,8 +324,7 @@ HedgedConnectionsFactory::State HedgedConnectionsFactory::processFinishedConnect else { ShuffledPool & shuffled_pool = shuffled_pools[index]; - LOG_WARNING( - log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message); + LOG_INFO(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message); ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry); shuffled_pool.error_count = std::min(pool->getMaxErrorCup(), shuffled_pool.error_count + 1); diff --git a/src/Client/HedgedConnectionsFactory.h b/src/Client/HedgedConnectionsFactory.h index e41ac9767a5..f187e9b2abb 100644 --- a/src/Client/HedgedConnectionsFactory.h +++ b/src/Client/HedgedConnectionsFactory.h @@ -53,7 +53,8 @@ public: bool fallback_to_stale_replicas_, UInt64 max_parallel_replicas_, bool skip_unavailable_shards_, - std::shared_ptr table_to_check_ = nullptr); + std::shared_ptr table_to_check_ = nullptr, + GetPriorityForLoadBalancing::Func priority_func = {}); /// Create and return active connections according to pool_mode. std::vector getManyConnections(PoolMode pool_mode, AsyncCallback async_callback = {}); diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index e32297b0898..69363a7c54f 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -575,30 +575,24 @@ void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::excepti // Update dependent jobs for (const auto & dpt : dependent) { - if (auto dpt_info = scheduled_jobs.find(dpt); dpt_info != scheduled_jobs.end()) - { - dpt_info->second.dependencies_left--; - if (!dpt_info->second.isBlocked()) - enqueue(dpt_info->second, dpt, lock); + auto dpt_info = scheduled_jobs.find(dpt); + if (dpt_info == scheduled_jobs.end()) + continue; + dpt_info->second.dependencies_left--; + if (!dpt_info->second.isBlocked()) + enqueue(dpt_info->second, dpt, lock); - if (status != LoadStatus::OK) - { - std::exception_ptr cancel; - NOEXCEPT_SCOPE({ - ALLOW_ALLOCATIONS_IN_SCOPE; - if (dpt->dependency_failure) - dpt->dependency_failure(dpt, job, cancel); - }); - // Recurse into dependent job if it should be canceled - if (cancel) - finish(dpt, LoadStatus::CANCELED, cancel, lock); - } - } - else + if (status != LoadStatus::OK) { - // Job has already been canceled. Do not enter twice into the same job during finish recursion. - // This happens in {A<-B; A<-C; B<-D; C<-D} graph for D if A is failed or canceled. - chassert(status == LoadStatus::CANCELED); + std::exception_ptr cancel; + NOEXCEPT_SCOPE({ + ALLOW_ALLOCATIONS_IN_SCOPE; + if (dpt->dependency_failure) + dpt->dependency_failure(dpt, job, cancel); + }); + // Recurse into dependent job if it should be canceled + if (cancel) + finish(dpt, LoadStatus::CANCELED, cancel, lock); } } diff --git a/src/Common/GetPriorityForLoadBalancing.cpp b/src/Common/GetPriorityForLoadBalancing.cpp index bc00e047a88..d4c6f89ff92 100644 --- a/src/Common/GetPriorityForLoadBalancing.cpp +++ b/src/Common/GetPriorityForLoadBalancing.cpp @@ -9,7 +9,8 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -std::function GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const +GetPriorityForLoadBalancing::Func +GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const { std::function get_priority; switch (load_balance) @@ -33,19 +34,26 @@ std::function GetPriorityForLoadBalancing::getPriorityFu get_priority = [offset](size_t i) { return i != offset ? Priority{1} : Priority{0}; }; break; case LoadBalancing::ROUND_ROBIN: - if (last_used >= pool_size) - last_used = 0; + auto local_last_used = last_used % pool_size; ++last_used; - /* Consider pool_size equals to 5 - * last_used = 1 -> get_priority: 0 1 2 3 4 - * last_used = 2 -> get_priority: 4 0 1 2 3 - * last_used = 3 -> get_priority: 4 3 0 1 2 - * ... - * */ - get_priority = [this, pool_size](size_t i) + + // Example: pool_size = 5 + // | local_last_used | i=0 | i=1 | i=2 | i=3 | i=4 | + // | 0 | 4 | 0 | 1 | 2 | 3 | + // | 1 | 3 | 4 | 0 | 1 | 2 | + // | 2 | 2 | 3 | 4 | 0 | 1 | + // | 3 | 1 | 2 | 3 | 4 | 0 | + // | 4 | 0 | 1 | 2 | 3 | 4 | + + get_priority = [pool_size, local_last_used](size_t i) { - ++i; // To make `i` indexing start with 1 instead of 0 as `last_used` does - return Priority{static_cast(i < last_used ? pool_size - i : i - last_used)}; + size_t priority = pool_size - 1; + if (i < local_last_used) + priority = pool_size - 1 - (local_last_used - i); + if (i > local_last_used) + priority = i - local_last_used - 1; + + return Priority{static_cast(priority)}; }; break; } diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h index c60d180eca0..0de99730977 100644 --- a/src/Common/GetPriorityForLoadBalancing.h +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -8,7 +8,12 @@ namespace DB class GetPriorityForLoadBalancing { public: - explicit GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {} + using Func = std::function; + + explicit GetPriorityForLoadBalancing(LoadBalancing load_balancing_, size_t last_used_ = 0) + : load_balancing(load_balancing_), last_used(last_used_) + { + } GetPriorityForLoadBalancing() = default; bool operator == (const GetPriorityForLoadBalancing & other) const @@ -23,7 +28,7 @@ public: return !(*this == other); } - std::function getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const; + Func getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const; std::vector hostname_prefix_distance; /// Prefix distances from name of this host to the names of hosts of pools. std::vector hostname_levenshtein_distance; /// Levenshtein Distances from name of this host to the names of hosts of pools. diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index 543a39fbc39..f960d551996 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -124,7 +124,9 @@ public: size_t max_ignored_errors, bool fallback_to_stale_replicas, const TryGetEntryFunc & try_get_entry, - const GetPriorityFunc & get_priority = GetPriorityFunc()); + const GetPriorityFunc & get_priority); + + size_t getPoolSize() const { return nested_pools.size(); } protected: @@ -147,7 +149,7 @@ protected: return std::make_tuple(shared_pool_states, nested_pools, last_error_decrease_time); } - NestedPools nested_pools; + const NestedPools nested_pools; const time_t decrease_error_period; const size_t max_error_cap; diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp index 0650637d9e8..7342933beff 100644 --- a/src/Common/RemoteProxyConfigurationResolver.cpp +++ b/src/Common/RemoteProxyConfigurationResolver.cpp @@ -50,11 +50,10 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve() /// 1 second is enough for now. /// TODO: Make timeouts configurable. - ConnectionTimeouts timeouts( - Poco::Timespan(1000000), /// Connection timeout. - Poco::Timespan(1000000), /// Send timeout. - Poco::Timespan(1000000) /// Receive timeout. - ); + auto timeouts = ConnectionTimeouts() + .withConnectionTimeout(1) + .withSendTimeout(1) + .withReceiveTimeout(1); try { diff --git a/src/IO/IResourceManager.h b/src/Common/Scheduler/IResourceManager.h similarity index 97% rename from src/IO/IResourceManager.h rename to src/Common/Scheduler/IResourceManager.h index 39fede0e19f..98fd3a9e5af 100644 --- a/src/IO/IResourceManager.h +++ b/src/Common/Scheduler/IResourceManager.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include diff --git a/src/IO/ISchedulerConstraint.h b/src/Common/Scheduler/ISchedulerConstraint.h similarity index 98% rename from src/IO/ISchedulerConstraint.h rename to src/Common/Scheduler/ISchedulerConstraint.h index 05bed7c3df8..a976206de74 100644 --- a/src/IO/ISchedulerConstraint.h +++ b/src/Common/Scheduler/ISchedulerConstraint.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB { diff --git a/src/IO/ISchedulerNode.h b/src/Common/Scheduler/ISchedulerNode.h similarity index 99% rename from src/IO/ISchedulerNode.h rename to src/Common/Scheduler/ISchedulerNode.h index e6e6bacbf9e..804026d7bf4 100644 --- a/src/IO/ISchedulerNode.h +++ b/src/Common/Scheduler/ISchedulerNode.h @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include diff --git a/src/IO/ISchedulerQueue.h b/src/Common/Scheduler/ISchedulerQueue.h similarity index 93% rename from src/IO/ISchedulerQueue.h rename to src/Common/Scheduler/ISchedulerQueue.h index 2e190a529e3..cbe63bd304a 100644 --- a/src/IO/ISchedulerQueue.h +++ b/src/Common/Scheduler/ISchedulerQueue.h @@ -1,8 +1,8 @@ #pragma once -#include -#include -#include +#include +#include +#include #include diff --git a/src/IO/Resource/ClassifiersConfig.cpp b/src/Common/Scheduler/Nodes/ClassifiersConfig.cpp similarity index 95% rename from src/IO/Resource/ClassifiersConfig.cpp rename to src/Common/Scheduler/Nodes/ClassifiersConfig.cpp index 7dc4d517138..192f97645a0 100644 --- a/src/IO/Resource/ClassifiersConfig.cpp +++ b/src/Common/Scheduler/Nodes/ClassifiersConfig.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/src/IO/Resource/ClassifiersConfig.h b/src/Common/Scheduler/Nodes/ClassifiersConfig.h similarity index 100% rename from src/IO/Resource/ClassifiersConfig.h rename to src/Common/Scheduler/Nodes/ClassifiersConfig.h diff --git a/src/IO/Resource/DynamicResourceManager.cpp b/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp similarity index 97% rename from src/IO/Resource/DynamicResourceManager.cpp rename to src/Common/Scheduler/Nodes/DynamicResourceManager.cpp index b9803d8079d..3ab85c81428 100644 --- a/src/IO/Resource/DynamicResourceManager.cpp +++ b/src/Common/Scheduler/Nodes/DynamicResourceManager.cpp @@ -1,8 +1,8 @@ -#include +#include -#include -#include -#include +#include +#include +#include #include #include diff --git a/src/IO/Resource/DynamicResourceManager.h b/src/Common/Scheduler/Nodes/DynamicResourceManager.h similarity index 95% rename from src/IO/Resource/DynamicResourceManager.h rename to src/Common/Scheduler/Nodes/DynamicResourceManager.h index 3372d40a285..ff736693664 100644 --- a/src/IO/Resource/DynamicResourceManager.h +++ b/src/Common/Scheduler/Nodes/DynamicResourceManager.h @@ -1,8 +1,8 @@ #pragma once -#include -#include -#include +#include +#include +#include #include diff --git a/src/IO/Resource/FairPolicy.cpp b/src/Common/Scheduler/Nodes/FairPolicy.cpp similarity index 55% rename from src/IO/Resource/FairPolicy.cpp rename to src/Common/Scheduler/Nodes/FairPolicy.cpp index 248ff04cbd7..84e8b3c4bbd 100644 --- a/src/IO/Resource/FairPolicy.cpp +++ b/src/Common/Scheduler/Nodes/FairPolicy.cpp @@ -1,6 +1,6 @@ -#include +#include -#include +#include namespace DB { diff --git a/src/IO/Resource/FairPolicy.h b/src/Common/Scheduler/Nodes/FairPolicy.h similarity index 99% rename from src/IO/Resource/FairPolicy.h rename to src/Common/Scheduler/Nodes/FairPolicy.h index a8608b5bea3..c0e187e6fa9 100644 --- a/src/IO/Resource/FairPolicy.h +++ b/src/Common/Scheduler/Nodes/FairPolicy.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include diff --git a/src/IO/Resource/FifoQueue.cpp b/src/Common/Scheduler/Nodes/FifoQueue.cpp similarity index 55% rename from src/IO/Resource/FifoQueue.cpp rename to src/Common/Scheduler/Nodes/FifoQueue.cpp index f4b0e9c3328..f096a1aff93 100644 --- a/src/IO/Resource/FifoQueue.cpp +++ b/src/Common/Scheduler/Nodes/FifoQueue.cpp @@ -1,6 +1,6 @@ -#include +#include -#include +#include namespace DB { diff --git a/src/IO/Resource/FifoQueue.h b/src/Common/Scheduler/Nodes/FifoQueue.h similarity index 98% rename from src/IO/Resource/FifoQueue.h rename to src/Common/Scheduler/Nodes/FifoQueue.h index e95e817719a..38ae902bc2f 100644 --- a/src/IO/Resource/FifoQueue.h +++ b/src/Common/Scheduler/Nodes/FifoQueue.h @@ -2,7 +2,7 @@ #include -#include +#include #include diff --git a/src/IO/Resource/PriorityPolicy.cpp b/src/Common/Scheduler/Nodes/PriorityPolicy.cpp similarity index 56% rename from src/IO/Resource/PriorityPolicy.cpp rename to src/Common/Scheduler/Nodes/PriorityPolicy.cpp index bee9a6d5dde..f4bdc1cddb7 100644 --- a/src/IO/Resource/PriorityPolicy.cpp +++ b/src/Common/Scheduler/Nodes/PriorityPolicy.cpp @@ -1,6 +1,6 @@ -#include +#include -#include +#include namespace DB { diff --git a/src/IO/Resource/PriorityPolicy.h b/src/Common/Scheduler/Nodes/PriorityPolicy.h similarity index 98% rename from src/IO/Resource/PriorityPolicy.h rename to src/Common/Scheduler/Nodes/PriorityPolicy.h index ee34c38f7e5..6d6b15bd063 100644 --- a/src/IO/Resource/PriorityPolicy.h +++ b/src/Common/Scheduler/Nodes/PriorityPolicy.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/src/IO/SchedulerNodeFactory.h b/src/Common/Scheduler/Nodes/SchedulerNodeFactory.h similarity index 97% rename from src/IO/SchedulerNodeFactory.h rename to src/Common/Scheduler/Nodes/SchedulerNodeFactory.h index 5c31534a9b8..e69fd21e139 100644 --- a/src/IO/SchedulerNodeFactory.h +++ b/src/Common/Scheduler/Nodes/SchedulerNodeFactory.h @@ -3,7 +3,7 @@ #include #include -#include +#include #include diff --git a/src/IO/Resource/SemaphoreConstraint.cpp b/src/Common/Scheduler/Nodes/SemaphoreConstraint.cpp similarity index 58% rename from src/IO/Resource/SemaphoreConstraint.cpp rename to src/Common/Scheduler/Nodes/SemaphoreConstraint.cpp index 2135fd65a84..cc55d35df0a 100644 --- a/src/IO/Resource/SemaphoreConstraint.cpp +++ b/src/Common/Scheduler/Nodes/SemaphoreConstraint.cpp @@ -1,6 +1,6 @@ -#include +#include -#include +#include namespace DB { diff --git a/src/IO/Resource/SemaphoreConstraint.h b/src/Common/Scheduler/Nodes/SemaphoreConstraint.h similarity index 98% rename from src/IO/Resource/SemaphoreConstraint.h rename to src/Common/Scheduler/Nodes/SemaphoreConstraint.h index f0053ded0af..49677f40f26 100644 --- a/src/IO/Resource/SemaphoreConstraint.h +++ b/src/Common/Scheduler/Nodes/SemaphoreConstraint.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/src/IO/Resource/ThrottlerConstraint.cpp b/src/Common/Scheduler/Nodes/ThrottlerConstraint.cpp similarity index 58% rename from src/IO/Resource/ThrottlerConstraint.cpp rename to src/Common/Scheduler/Nodes/ThrottlerConstraint.cpp index 0ced2b1ab3e..d31c0f7e2a9 100644 --- a/src/IO/Resource/ThrottlerConstraint.cpp +++ b/src/Common/Scheduler/Nodes/ThrottlerConstraint.cpp @@ -1,6 +1,6 @@ -#include +#include -#include +#include namespace DB { diff --git a/src/IO/Resource/ThrottlerConstraint.h b/src/Common/Scheduler/Nodes/ThrottlerConstraint.h similarity index 99% rename from src/IO/Resource/ThrottlerConstraint.h rename to src/Common/Scheduler/Nodes/ThrottlerConstraint.h index 7de53fe0728..f061ff015e5 100644 --- a/src/IO/Resource/ThrottlerConstraint.h +++ b/src/Common/Scheduler/Nodes/ThrottlerConstraint.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/src/IO/Resource/registerResourceManagers.cpp b/src/Common/Scheduler/Nodes/registerResourceManagers.cpp similarity index 53% rename from src/IO/Resource/registerResourceManagers.cpp rename to src/Common/Scheduler/Nodes/registerResourceManagers.cpp index 5217bcdfbec..c5d5ba5b981 100644 --- a/src/IO/Resource/registerResourceManagers.cpp +++ b/src/Common/Scheduler/Nodes/registerResourceManagers.cpp @@ -1,17 +1,15 @@ -#include -#include +#include +#include namespace DB { void registerDynamicResourceManager(ResourceManagerFactory &); -void registerStaticResourceManager(ResourceManagerFactory &); void registerResourceManagers() { auto & factory = ResourceManagerFactory::instance(); registerDynamicResourceManager(factory); - registerStaticResourceManager(factory); } } diff --git a/src/IO/Resource/registerResourceManagers.h b/src/Common/Scheduler/Nodes/registerResourceManagers.h similarity index 100% rename from src/IO/Resource/registerResourceManagers.h rename to src/Common/Scheduler/Nodes/registerResourceManagers.h diff --git a/src/IO/Resource/registerSchedulerNodes.cpp b/src/Common/Scheduler/Nodes/registerSchedulerNodes.cpp similarity index 75% rename from src/IO/Resource/registerSchedulerNodes.cpp rename to src/Common/Scheduler/Nodes/registerSchedulerNodes.cpp index 612885e26b7..1c2b645607b 100644 --- a/src/IO/Resource/registerSchedulerNodes.cpp +++ b/src/Common/Scheduler/Nodes/registerSchedulerNodes.cpp @@ -1,8 +1,8 @@ -#include +#include -#include -#include -#include +#include +#include +#include namespace DB { diff --git a/src/IO/Resource/registerSchedulerNodes.h b/src/Common/Scheduler/Nodes/registerSchedulerNodes.h similarity index 100% rename from src/IO/Resource/registerSchedulerNodes.h rename to src/Common/Scheduler/Nodes/registerSchedulerNodes.h diff --git a/src/IO/Resource/tests/ResourceTest.h b/src/Common/Scheduler/Nodes/tests/ResourceTest.h similarity index 95% rename from src/IO/Resource/tests/ResourceTest.h rename to src/Common/Scheduler/Nodes/tests/ResourceTest.h index 072e2dda47c..9ddd498ed61 100644 --- a/src/IO/Resource/tests/ResourceTest.h +++ b/src/Common/Scheduler/Nodes/tests/ResourceTest.h @@ -1,14 +1,14 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include diff --git a/src/IO/Resource/tests/gtest_resource_manager_hierarchical.cpp b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp similarity index 94% rename from src/IO/Resource/tests/gtest_resource_manager_hierarchical.cpp rename to src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp index 949a1ee0264..961a3b6f713 100644 --- a/src/IO/Resource/tests/gtest_resource_manager_hierarchical.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp @@ -1,8 +1,8 @@ #include -#include +#include -#include +#include #include using namespace DB; @@ -10,7 +10,7 @@ using namespace DB; using ResourceTest = ResourceTestManager; using TestGuard = ResourceTest::Guard; -TEST(IOResourceDynamicResourceManager, Smoke) +TEST(SchedulerDynamicResourceManager, Smoke) { ResourceTest t; @@ -45,7 +45,7 @@ TEST(IOResourceDynamicResourceManager, Smoke) } } -TEST(IOResourceDynamicResourceManager, Fairness) +TEST(SchedulerDynamicResourceManager, Fairness) { // Total cost for A and B cannot differ for more than 1 (every request has cost equal to 1). // Requests from A use `value = 1` and from B `value = -1` is used. diff --git a/src/IO/Resource/tests/gtest_resource_class_fair.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_class_fair.cpp similarity index 93% rename from src/IO/Resource/tests/gtest_resource_class_fair.cpp rename to src/Common/Scheduler/Nodes/tests/gtest_resource_class_fair.cpp index 89ec2ac7c32..4f0e8c80734 100644 --- a/src/IO/Resource/tests/gtest_resource_class_fair.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_class_fair.cpp @@ -1,14 +1,14 @@ #include -#include +#include -#include +#include using namespace DB; using ResourceTest = ResourceTestClass; -TEST(IOResourceFairPolicy, Factory) +TEST(SchedulerFairPolicy, Factory) { ResourceTest t; @@ -17,7 +17,7 @@ TEST(IOResourceFairPolicy, Factory) EXPECT_TRUE(dynamic_cast(fair.get()) != nullptr); } -TEST(IOResourceFairPolicy, FairnessWeights) +TEST(SchedulerFairPolicy, FairnessWeights) { ResourceTest t; @@ -41,7 +41,7 @@ TEST(IOResourceFairPolicy, FairnessWeights) t.consumed("B", 20); } -TEST(IOResourceFairPolicy, Activation) +TEST(SchedulerFairPolicy, Activation) { ResourceTest t; @@ -77,7 +77,7 @@ TEST(IOResourceFairPolicy, Activation) t.consumed("B", 10); } -TEST(IOResourceFairPolicy, FairnessMaxMin) +TEST(SchedulerFairPolicy, FairnessMaxMin) { ResourceTest t; @@ -101,7 +101,7 @@ TEST(IOResourceFairPolicy, FairnessMaxMin) t.consumed("A", 20); } -TEST(IOResourceFairPolicy, HierarchicalFairness) +TEST(SchedulerFairPolicy, HierarchicalFairness) { ResourceTest t; diff --git a/src/IO/Resource/tests/gtest_resource_class_priority.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_class_priority.cpp similarity index 89% rename from src/IO/Resource/tests/gtest_resource_class_priority.cpp rename to src/Common/Scheduler/Nodes/tests/gtest_resource_class_priority.cpp index 83902453ed4..a447b7f6780 100644 --- a/src/IO/Resource/tests/gtest_resource_class_priority.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_class_priority.cpp @@ -1,14 +1,14 @@ #include -#include +#include -#include +#include using namespace DB; using ResourceTest = ResourceTestClass; -TEST(IOResourcePriorityPolicy, Factory) +TEST(SchedulerPriorityPolicy, Factory) { ResourceTest t; @@ -17,7 +17,7 @@ TEST(IOResourcePriorityPolicy, Factory) EXPECT_TRUE(dynamic_cast(prio.get()) != nullptr); } -TEST(IOResourcePriorityPolicy, Priorities) +TEST(SchedulerPriorityPolicy, Priorities) { ResourceTest t; @@ -51,7 +51,7 @@ TEST(IOResourcePriorityPolicy, Priorities) t.consumed("C", 0); } -TEST(IOResourcePriorityPolicy, Activation) +TEST(SchedulerPriorityPolicy, Activation) { ResourceTest t; @@ -92,7 +92,7 @@ TEST(IOResourcePriorityPolicy, Activation) t.consumed("C", 0); } -TEST(IOResourcePriorityPolicy, SinglePriority) +TEST(SchedulerPriorityPolicy, SinglePriority) { ResourceTest t; diff --git a/src/IO/Resource/tests/gtest_resource_scheduler.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp similarity index 95% rename from src/IO/Resource/tests/gtest_resource_scheduler.cpp rename to src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp index da2ffb2dd25..9fefbc02cbd 100644 --- a/src/IO/Resource/tests/gtest_resource_scheduler.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp @@ -1,8 +1,8 @@ #include -#include +#include -#include +#include #include @@ -73,7 +73,7 @@ struct ResourceHolder } }; -TEST(IOSchedulerRoot, Smoke) +TEST(SchedulerRoot, Smoke) { ResourceTest t; diff --git a/src/IO/Resource/tests/gtest_throttler_constraint.cpp b/src/Common/Scheduler/Nodes/tests/gtest_throttler_constraint.cpp similarity index 90% rename from src/IO/Resource/tests/gtest_throttler_constraint.cpp rename to src/Common/Scheduler/Nodes/tests/gtest_throttler_constraint.cpp index e6d83d0d0ca..9703227ccfc 100644 --- a/src/IO/Resource/tests/gtest_throttler_constraint.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_throttler_constraint.cpp @@ -1,18 +1,18 @@ #include #include -#include +#include -#include -#include -#include "IO/ISchedulerNode.h" -#include "IO/ResourceRequest.h" +#include +#include +#include "Common/Scheduler/ISchedulerNode.h" +#include "Common/Scheduler/ResourceRequest.h" using namespace DB; using ResourceTest = ResourceTestClass; -TEST(IOResourceThrottlerConstraint, LeakyBucketConstraint) +TEST(SchedulerThrottlerConstraint, LeakyBucketConstraint) { ResourceTest t; EventQueue::TimePoint start = std::chrono::system_clock::now(); @@ -42,7 +42,7 @@ TEST(IOResourceThrottlerConstraint, LeakyBucketConstraint) t.consumed("A", 10); } -TEST(IOResourceThrottlerConstraint, Unlimited) +TEST(SchedulerThrottlerConstraint, Unlimited) { ResourceTest t; EventQueue::TimePoint start = std::chrono::system_clock::now(); @@ -59,7 +59,7 @@ TEST(IOResourceThrottlerConstraint, Unlimited) } } -TEST(IOResourceThrottlerConstraint, Pacing) +TEST(SchedulerThrottlerConstraint, Pacing) { ResourceTest t; EventQueue::TimePoint start = std::chrono::system_clock::now(); @@ -79,7 +79,7 @@ TEST(IOResourceThrottlerConstraint, Pacing) } } -TEST(IOResourceThrottlerConstraint, BucketFilling) +TEST(SchedulerThrottlerConstraint, BucketFilling) { ResourceTest t; EventQueue::TimePoint start = std::chrono::system_clock::now(); @@ -113,7 +113,7 @@ TEST(IOResourceThrottlerConstraint, BucketFilling) t.consumed("A", 3); } -TEST(IOResourceThrottlerConstraint, PeekAndAvgLimits) +TEST(SchedulerThrottlerConstraint, PeekAndAvgLimits) { ResourceTest t; EventQueue::TimePoint start = std::chrono::system_clock::now(); @@ -141,7 +141,7 @@ TEST(IOResourceThrottlerConstraint, PeekAndAvgLimits) } } -TEST(IOResourceThrottlerConstraint, ThrottlerAndFairness) +TEST(SchedulerThrottlerConstraint, ThrottlerAndFairness) { ResourceTest t; EventQueue::TimePoint start = std::chrono::system_clock::now(); diff --git a/src/IO/ResourceBudget.h b/src/Common/Scheduler/ResourceBudget.h similarity index 97% rename from src/IO/ResourceBudget.h rename to src/Common/Scheduler/ResourceBudget.h index 0adad45ba91..118720fae39 100644 --- a/src/IO/ResourceBudget.h +++ b/src/Common/Scheduler/ResourceBudget.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include namespace DB diff --git a/src/IO/ResourceGuard.h b/src/Common/Scheduler/ResourceGuard.h similarity index 96% rename from src/IO/ResourceGuard.h rename to src/Common/Scheduler/ResourceGuard.h index 92f25b40f6b..dca4041b176 100644 --- a/src/IO/ResourceGuard.h +++ b/src/Common/Scheduler/ResourceGuard.h @@ -2,9 +2,9 @@ #include -#include -#include -#include +#include +#include +#include #include #include diff --git a/src/IO/ResourceLink.h b/src/Common/Scheduler/ResourceLink.h similarity index 88% rename from src/IO/ResourceLink.h rename to src/Common/Scheduler/ResourceLink.h index 2da5e75fcba..108f51ac399 100644 --- a/src/IO/ResourceLink.h +++ b/src/Common/Scheduler/ResourceLink.h @@ -2,8 +2,8 @@ #include -#include -#include +#include +#include namespace DB diff --git a/src/IO/ResourceManagerFactory.h b/src/Common/Scheduler/ResourceManagerFactory.h similarity index 95% rename from src/IO/ResourceManagerFactory.h rename to src/Common/Scheduler/ResourceManagerFactory.h index 8e972f05640..52f271e51b1 100644 --- a/src/IO/ResourceManagerFactory.h +++ b/src/Common/Scheduler/ResourceManagerFactory.h @@ -3,7 +3,7 @@ #include #include -#include +#include #include diff --git a/src/IO/ResourceRequest.h b/src/Common/Scheduler/ResourceRequest.h similarity index 100% rename from src/IO/ResourceRequest.h rename to src/Common/Scheduler/ResourceRequest.h diff --git a/src/IO/SchedulerRoot.h b/src/Common/Scheduler/SchedulerRoot.h similarity index 98% rename from src/IO/SchedulerRoot.h rename to src/Common/Scheduler/SchedulerRoot.h index 68aa14a289d..3a23a8df834 100644 --- a/src/IO/SchedulerRoot.h +++ b/src/Common/Scheduler/SchedulerRoot.h @@ -5,8 +5,8 @@ #include #include -#include -#include +#include +#include #include diff --git a/src/Common/TargetSpecific.h b/src/Common/TargetSpecific.h index e03036747f9..229150ecccb 100644 --- a/src/Common/TargetSpecific.h +++ b/src/Common/TargetSpecific.h @@ -108,7 +108,7 @@ String toString(TargetArch arch); #define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi"))) #define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw"))) #define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f"))) -#define AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2"))) +#define AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,bmi2"))) #define AVX_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx")) #define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt"))) #define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE @@ -122,7 +122,7 @@ String toString(TargetArch arch); # define BEGIN_AVX512F_SPECIFIC_CODE \ _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f\"))),apply_to=function)") # define BEGIN_AVX2_SPECIFIC_CODE \ - _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2\"))),apply_to=function)") + _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,bmi2\"))),apply_to=function)") # define BEGIN_AVX_SPECIFIC_CODE \ _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx\"))),apply_to=function)") # define BEGIN_SSE42_SPECIFIC_CODE \ @@ -140,7 +140,7 @@ String toString(TargetArch arch); #define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native"))) #define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,tune=native"))) #define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native"))) -#define AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,tune=native"))) +#define AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,bmi2,tune=native"))) #define AVX_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,tune=native"))) #define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt",tune=native))) #define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE @@ -159,7 +159,7 @@ String toString(TargetArch arch); _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native\")") # define BEGIN_AVX2_SPECIFIC_CODE \ _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,tune=native\")") + _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,bmi2,tune=native\")") # define BEGIN_AVX_SPECIFIC_CODE \ _Pragma("GCC push_options") \ _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,tune=native\")") diff --git a/src/Common/getMaxFileDescriptorCount.cpp b/src/Common/getMaxFileDescriptorCount.cpp index 8ea55a23386..a2449f80889 100644 --- a/src/Common/getMaxFileDescriptorCount.cpp +++ b/src/Common/getMaxFileDescriptorCount.cpp @@ -1,36 +1,19 @@ -#include -#include -#include -#include #include -#include +#include -int getMaxFileDescriptorCount() +std::optional getMaxFileDescriptorCount() { - namespace fs = std::filesystem; - int result = -1; #if defined(OS_LINUX) || defined(OS_DARWIN) - using namespace DB; - - if (fs::exists("/proc/sys/fs/file-max")) + /// We want to calculate it only once. + static auto result = []() -> std::optional { - ReadBufferFromFile reader("/proc/sys/fs/file-max"); - readIntText(result, reader); - } - else - { - auto command = ShellCommand::execute("ulimit -n"); - try - { - readIntText(result, command->out); - command->wait(); - } - catch (...) // NOLINT(bugprone-empty-catch) - { - } - } - -#endif - + rlimit rlim; + if (0 != getrlimit(RLIMIT_NOFILE, &rlim)) + return std::nullopt; + return rlim.rlim_max; + }(); return result; +#else + return std::nullopt; +#endif } diff --git a/src/Common/getMaxFileDescriptorCount.h b/src/Common/getMaxFileDescriptorCount.h index 6cc5df0d14c..bf0620fb6f9 100644 --- a/src/Common/getMaxFileDescriptorCount.h +++ b/src/Common/getMaxFileDescriptorCount.h @@ -1,6 +1,8 @@ #pragma once -/// Get process max file descriptor count -/// @return -1 if os does not support ulimit command or some error occurs -int getMaxFileDescriptorCount(); +#include + +/// Get process max file descriptor count +/// @return std::nullopt if os does not support getrlimit command or some error occurs +std::optional getMaxFileDescriptorCount(); diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 803c6eb594e..af2e4ec5a34 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -300,7 +300,11 @@ String MonitorCommand::run() #if defined(OS_LINUX) || defined(OS_DARWIN) print(ret, "open_file_descriptor_count", getCurrentProcessFDCount()); - print(ret, "max_file_descriptor_count", getMaxFileDescriptorCount()); + auto max_file_descriptor_count = getMaxFileDescriptorCount(); + if (max_file_descriptor_count.has_value()) + print(ret, "max_file_descriptor_count", *max_file_descriptor_count); + else + print(ret, "max_file_descriptor_count", -1); #endif if (keeper_info.is_leader) diff --git a/src/Coordination/KeeperAsynchronousMetrics.cpp b/src/Coordination/KeeperAsynchronousMetrics.cpp index c48b6f41fb5..e3e91e6bd07 100644 --- a/src/Coordination/KeeperAsynchronousMetrics.cpp +++ b/src/Coordination/KeeperAsynchronousMetrics.cpp @@ -22,7 +22,7 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM size_t key_arena_size = 0; size_t latest_snapshot_size = 0; size_t open_file_descriptor_count = 0; - size_t max_file_descriptor_count = 0; + std::optional max_file_descriptor_count = 0; size_t followers = 0; size_t synced_followers = 0; size_t zxid = 0; @@ -79,7 +79,10 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM new_values["KeeperLatestSnapshotSize"] = { latest_snapshot_size, "The uncompressed size in bytes of the latest snapshot created by ClickHouse Keeper." }; new_values["KeeperOpenFileDescriptorCount"] = { open_file_descriptor_count, "The number of open file descriptors in ClickHouse Keeper." }; - new_values["KeeperMaxFileDescriptorCount"] = { max_file_descriptor_count, "The maximum number of open file descriptors in ClickHouse Keeper." }; + if (max_file_descriptor_count.has_value()) + new_values["KeeperMaxFileDescriptorCount"] = { *max_file_descriptor_count, "The maximum number of open file descriptors in ClickHouse Keeper." }; + else + new_values["KeeperMaxFileDescriptorCount"] = { -1, "The maximum number of open file descriptors in ClickHouse Keeper." }; new_values["KeeperFollowers"] = { followers, "The number of followers of ClickHouse Keeper." }; new_values["KeeperSyncedFollowers"] = { synced_followers, "The number of followers of ClickHouse Keeper who are also in-sync." }; diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 9947d409c79..8bf48f23f50 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -134,7 +135,7 @@ void KeeperDispatcher::requestThread() Int64 mem_soft_limit = keeper_context->getKeeperMemorySoftLimit(); if (configuration_and_settings->standalone_keeper && isExceedingMemorySoftLimit() && checkIfRequestIncreaseMem(request.request)) { - LOG_TRACE(log, "Processing requests refused because of max_memory_usage_soft_limit {}, the total used memory is {}, request type is {}", mem_soft_limit, total_memory_tracker.get(), request.request->getOpNum()); + LOG_WARNING(log, "Processing requests refused because of max_memory_usage_soft_limit {}, the total used memory is {}, request type is {}", ReadableSize(mem_soft_limit), ReadableSize(total_memory_tracker.get()), request.request->getOpNum()); addErrorResponses({request}, Coordination::Error::ZCONNECTIONLOSS); continue; } diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h index a6199864422..49ad2b568fe 100644 --- a/src/Coordination/Standalone/Context.h +++ b/src/Coordination/Standalone/Context.h @@ -14,7 +14,7 @@ #include #include -#include +#include #include diff --git a/src/Core/Defines.h b/src/Core/Defines.h index a3ab76c0b93..9e169642512 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -8,9 +8,11 @@ namespace DB static constexpr auto DBMS_DEFAULT_PORT = 9000; static constexpr auto DBMS_DEFAULT_SECURE_PORT = 9440; + static constexpr auto DBMS_DEFAULT_CONNECT_TIMEOUT_SEC = 10; static constexpr auto DBMS_DEFAULT_SEND_TIMEOUT_SEC = 300; static constexpr auto DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC = 300; + /// Timeout for synchronous request-result protocol call (like Ping or TablesStatus). static constexpr auto DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC = 5; static constexpr auto DBMS_DEFAULT_POLL_INTERVAL = 10; @@ -51,6 +53,7 @@ static constexpr auto DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT = 1; /// the number is unmotivated static constexpr auto DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT = 15; +static constexpr auto DEFAULT_TCP_KEEP_ALIVE_TIMEOUT = 290; static constexpr auto DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT = 30; static constexpr auto DBMS_DEFAULT_PATH = "/var/lib/clickhouse/"; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 2a9fa8e744c..8d3dec4c5c0 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -40,6 +40,7 @@ namespace DB M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \ M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \ M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \ + M(Bool, shutdown_wait_backups_and_restores, true, "If set to true ClickHouse will wait for running backups and restores to finish before shutdown.", 0) \ M(Int32, max_connections, 1024, "Max server connections.", 0) \ M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \ M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 292e945a29c..305d6466658 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -26,9 +26,8 @@ class IColumn; * `flags` can be either 0 or IMPORTANT. * A setting is "IMPORTANT" if it affects the results of queries and can't be ignored by older versions. * - * When adding new settings that control some backward incompatible changes or when changing some settings values, - * consider adding them to settings changes history in SettingsChangesHistory.h for special `compatibility` setting - * to work correctly. + * When adding new or changing existing settings add them to settings changes history in SettingsChangesHistory.h + * for tracking settings changes in different versions and for special `compatibility` setting to work correctly. */ #define COMMON_SETTINGS(M, ALIAS) \ @@ -62,7 +61,7 @@ class IColumn; M(Milliseconds, connect_timeout_with_failover_secure_ms, 1000, "Connection timeout for selecting first healthy replica (for secure connections).", 0) \ M(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "Timeout for receiving data from network, in seconds. If no bytes were received in this interval, exception is thrown. If you set this setting on client, the 'send_timeout' for the socket will be also set on the corresponding connection end on the server.", 0) \ M(Seconds, send_timeout, DBMS_DEFAULT_SEND_TIMEOUT_SEC, "Timeout for sending data to network, in seconds. If client needs to sent some data, but it did not able to send any bytes in this interval, exception is thrown. If you set this setting on client, the 'receive_timeout' for the socket will be also set on the corresponding connection end on the server.", 0) \ - M(Seconds, tcp_keep_alive_timeout, 290 /* less than DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC */, "The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes", 0) \ + M(Seconds, tcp_keep_alive_timeout, DEFAULT_TCP_KEEP_ALIVE_TIMEOUT /* less than DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC */, "The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes", 0) \ M(Milliseconds, hedged_connection_timeout_ms, 50, "Connection timeout for establishing connection with replica for Hedged requests", 0) \ M(Milliseconds, receive_data_timeout_ms, 2000, "Connection timeout for receiving first packet of data or packet with positive progress from replica", 0) \ M(Bool, use_hedged_requests, true, "Use hedged requests for distributed queries", 0) \ @@ -408,6 +407,7 @@ class IColumn; M(UInt64, min_execution_speed_bytes, 0, "Minimum number of execution bytes per second.", 0) \ M(UInt64, max_execution_speed_bytes, 0, "Maximum number of execution bytes per second.", 0) \ M(Seconds, timeout_before_checking_execution_speed, 10, "Check that the speed is not too low after the specified time has elapsed.", 0) \ + M(Seconds, max_estimated_execution_time, 0, "Maximum query estimate execution time in seconds.", 0) \ \ M(UInt64, max_columns_to_read, 0, "If a query requires reading more than specified number of columns, exception is thrown. Zero value means unlimited. This setting is useful to prevent too complex queries.", 0) \ M(UInt64, max_temporary_columns, 0, "If a query generates more than the specified number of temporary columns in memory as a result of intermediate calculation, exception is thrown. Zero value means unlimited. This setting is useful to prevent too complex queries.", 0) \ @@ -725,6 +725,7 @@ class IColumn; \ M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function `range` per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ M(UInt64, function_sleep_max_microseconds_per_block, 3000000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \ + M(UInt64, function_visible_width_behavior, 1, "The version of `visibleWidth` behavior. 0 - only count the number of code points; 1 - correctly count zero-width and combining characters, count full-width characters as two, estimate the tab width, count delete characters.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::pread, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 62ffd837a33..9c8c46d5ce6 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -77,18 +77,31 @@ namespace SettingsChangesHistory /// History of settings changes that controls some backward incompatible changes /// across all ClickHouse versions. It maps ClickHouse version to settings changes that were done -/// in this version. Settings changes is a vector of structs {setting_name, previous_value, new_value} +/// in this version. This history contains both changes to existing settings and newly added settings. +/// Settings changes is a vector of structs {setting_name, previous_value, new_value}. +/// For newly added setting choose the most appropriate previous_value (for example, if new setting +/// controls new feature and it's 'true' by default, use 'false' as previous_value). /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, - {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}}}, + {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, + {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, + {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, + {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, + {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, + {"enable_vertical_final", false, true, "Use vertical final by default"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, + {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, + {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}}}, {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, - {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}, - {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}}}, + {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 9cce59b0dca..f5d1ea5d877 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -56,7 +56,7 @@ static std::optional checkTupleNames(const Strings & names) return Exception(ErrorCodes::BAD_ARGUMENTS, "Names of tuple elements cannot be empty"); if (!names_set.insert(name).second) - return Exception(ErrorCodes::DUPLICATE_COLUMN, "Names of tuple elements must be unique"); + return Exception(ErrorCodes::DUPLICATE_COLUMN, "Names of tuple elements must be unique. Duplicate name: {}", name); } return {}; diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index ac81899156a..68fd9012857 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -455,7 +455,8 @@ void registerDiskEncrypted(DiskFactory & factory, bool global_skip_access_check) const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, - const DisksMap & map) -> DiskPtr + const DisksMap & map, + bool, bool) -> DiskPtr { bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); DiskPtr disk = std::make_shared(name, config, config_prefix, map); diff --git a/src/Disks/DiskFactory.cpp b/src/Disks/DiskFactory.cpp index 2261b24272c..de7ee5a74f4 100644 --- a/src/Disks/DiskFactory.cpp +++ b/src/Disks/DiskFactory.cpp @@ -25,7 +25,9 @@ DiskPtr DiskFactory::create( const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, - const DisksMap & map) const + const DisksMap & map, + bool attach, + bool custom_disk) const { const auto disk_type = config.getString(config_prefix + ".type", "local"); @@ -37,7 +39,7 @@ DiskPtr DiskFactory::create( } const auto & disk_creator = found->second; - return disk_creator(name, config, config_prefix, context, map); + return disk_creator(name, config, config_prefix, context, map, attach, custom_disk); } } diff --git a/src/Disks/DiskFactory.h b/src/Disks/DiskFactory.h index 998ef569ed5..d03ffa6a40f 100644 --- a/src/Disks/DiskFactory.h +++ b/src/Disks/DiskFactory.h @@ -27,7 +27,9 @@ public: const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, - const DisksMap & map)>; + const DisksMap & map, + bool attach, + bool custom_disk)>; static DiskFactory & instance(); @@ -38,7 +40,9 @@ public: const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, - const DisksMap & map) const; + const DisksMap & map, + bool attach = false, + bool custom_disk = false) const; private: using DiskTypeRegistry = std::unordered_map; diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 5e77ff61789..07c2beb002d 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -728,7 +728,8 @@ void registerDiskLocal(DiskFactory & factory, bool global_skip_access_check) const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, - const DisksMap & map) -> DiskPtr + const DisksMap & map, + bool, bool) -> DiskPtr { String path; UInt64 keep_free_space_bytes; diff --git a/src/Disks/DiskType.cpp b/src/Disks/DiskType.cpp index 59e242c7fe0..218b6ee7f26 100644 --- a/src/Disks/DiskType.cpp +++ b/src/Disks/DiskType.cpp @@ -3,38 +3,15 @@ namespace DB { -String toString(DataSourceType data_source_type) -{ - switch (data_source_type) - { - case DataSourceType::Local: - return "local"; - case DataSourceType::RAM: - return "memory"; - case DataSourceType::S3: - return "s3"; - case DataSourceType::S3_Plain: - return "s3_plain"; - case DataSourceType::HDFS: - return "hdfs"; - case DataSourceType::WebServer: - return "web"; - case DataSourceType::AzureBlobStorage: - return "azure_blob_storage"; - case DataSourceType::LocalBlobStorage: - return "local_blob_storage"; - } - std::unreachable; -} - bool DataSourceDescription::operator==(const DataSourceDescription & other) const { - return std::tie(type, description, is_encrypted) == std::tie(other.type, other.description, other.is_encrypted); + return std::tie(type, object_storage_type, description, is_encrypted) == std::tie(other.type, other.object_storage_type, other.description, other.is_encrypted); } bool DataSourceDescription::sameKind(const DataSourceDescription & other) const { - return std::tie(type, description) == std::tie(other.type, other.description); + return std::tie(type, object_storage_type, description) + == std::tie(other.type, other.object_storage_type, other.description); } } diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h index 82a00ccb3cc..15940ea9155 100644 --- a/src/Disks/DiskType.h +++ b/src/Disks/DiskType.h @@ -10,12 +10,27 @@ enum class DataSourceType { Local, RAM, + ObjectStorage, +}; + +enum class ObjectStorageType +{ + None, S3, S3_Plain, + Azure, HDFS, - WebServer, - AzureBlobStorage, - LocalBlobStorage, + Web, + Local, +}; + +enum class MetadataStorageType +{ + None, + Local, + Plain, + StaticWeb, + Memory, }; String toString(DataSourceType data_source_type); @@ -23,6 +38,9 @@ String toString(DataSourceType data_source_type); struct DataSourceDescription { DataSourceType type; + ObjectStorageType object_storage_type = ObjectStorageType::None; + MetadataStorageType metadata_type = MetadataStorageType::None; + std::string description; bool is_encrypted = false; @@ -30,6 +48,37 @@ struct DataSourceDescription bool operator==(const DataSourceDescription & other) const; bool sameKind(const DataSourceDescription & other) const; + + std::string toString() const + { + switch (type) + { + case DataSourceType::Local: + return "local"; + case DataSourceType::RAM: + return "memory"; + case DataSourceType::ObjectStorage: + { + switch (object_storage_type) + { + case ObjectStorageType::S3: + return "s3"; + case ObjectStorageType::S3_Plain: + return "s3_plain"; + case ObjectStorageType::HDFS: + return "hdfs"; + case ObjectStorageType::Azure: + return "azure_blob_storage"; + case ObjectStorageType::Local: + return "local_blob_storage"; + case ObjectStorageType::Web: + return "web"; + case ObjectStorageType::None: + return "none"; + } + } + } + } }; } diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 8aaee17f237..3d228850537 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -293,7 +293,7 @@ public: { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getCacheLayersNames()` is not implemented for disk: {}", - toString(getDataSourceDescription().type)); + getDataSourceDescription().toString()); } /// Returns a list of storage objects (contains path, size, ...). @@ -303,7 +303,7 @@ public: { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getStorageObjects()` not implemented for disk: {}", - toString(getDataSourceDescription().type)); + getDataSourceDescription().toString()); } /// For one local path there might be multiple remote paths in case of Log family engines. @@ -324,7 +324,7 @@ public: { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePathsRecursive() not implemented for disk: {}`", - toString(getDataSourceDescription().type)); + getDataSourceDescription().toString()); } /// Batch request to remove multiple files. @@ -412,7 +412,7 @@ public: throw Exception( ErrorCodes::NOT_IMPLEMENTED, "Method getMetadataStorage() is not implemented for disk type: {}", - toString(getDataSourceDescription().type)); + getDataSourceDescription().toString()); } /// Very similar case as for getMetadataDiskIfExistsOrSelf(). If disk has "metadata" @@ -446,7 +446,7 @@ public: throw Exception( ErrorCodes::NOT_IMPLEMENTED, "Method getObjectStorage() is not implemented for disk type: {}", - toString(getDataSourceDescription().type)); + getDataSourceDescription().toString()); } /// Create disk object storage according to disk type. @@ -457,7 +457,7 @@ public: throw Exception( ErrorCodes::NOT_IMPLEMENTED, "Method createDiskObjectStorage() is not implemented for disk type: {}", - toString(getDataSourceDescription().type)); + getDataSourceDescription().toString()); } virtual bool supportsStat() const { return false; } diff --git a/src/Disks/IO/ReadBufferFromWebServer.cpp b/src/Disks/IO/ReadBufferFromWebServer.cpp index 90cd5285875..7a602856183 100644 --- a/src/Disks/IO/ReadBufferFromWebServer.cpp +++ b/src/Disks/IO/ReadBufferFromWebServer.cpp @@ -56,15 +56,15 @@ std::unique_ptr ReadBufferFromWebServer::initialize() const auto & settings = context->getSettingsRef(); const auto & server_settings = context->getServerSettings(); + auto connection_timeouts = ConnectionTimeouts::getHTTPTimeouts(settings, server_settings.keep_alive_timeout); + connection_timeouts.withConnectionTimeout(std::max(settings.http_connection_timeout, Poco::Timespan(20, 0))); + connection_timeouts.withReceiveTimeout(std::max(settings.http_receive_timeout, Poco::Timespan(20, 0))); + auto res = std::make_unique( uri, Poco::Net::HTTPRequest::HTTP_GET, ReadWriteBufferFromHTTP::OutStreamCallback(), - ConnectionTimeouts(std::max(Poco::Timespan(settings.http_connection_timeout.totalSeconds(), 0), Poco::Timespan(20, 0)), - settings.http_send_timeout, - std::max(Poco::Timespan(settings.http_receive_timeout.totalSeconds(), 0), Poco::Timespan(20, 0)), - settings.tcp_keep_alive_timeout, - server_settings.keep_alive_timeout), + connection_timeouts, credentials, 0, buf_size, diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 60bc04f5f95..a2e84edf45f 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace ProfileEvents diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 068e2aebab1..93fed1e61bf 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -98,10 +98,6 @@ AzureObjectStorage::AzureObjectStorage( , settings(std::move(settings_)) , log(&Poco::Logger::get("AzureObjectStorage")) { - data_source_description.type = DataSourceType::AzureBlobStorage; - data_source_description.description = client.get()->GetUrl(); - data_source_description.is_cached = false; - data_source_description.is_encrypted = false; } ObjectStorageKey AzureObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index 4718dce9bf9..85213a3c24f 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -59,11 +59,13 @@ public: ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const override; - DataSourceDescription getDataSourceDescription() const override { return data_source_description; } - std::string getName() const override { return "AzureObjectStorage"; } - std::string getCommonKeyPrefix() const override { return ""; } /// No namespaces in azure. + ObjectStorageType getType() const override { return ObjectStorageType::Azure; } + + std::string getCommonKeyPrefix() const override { return ""; } + + std::string getDescription() const override { return client.get()->GetUrl(); } bool exists(const StoredObject & object) const override; @@ -133,8 +135,6 @@ private: MultiVersion settings; Poco::Logger * log; - - DataSourceDescription data_source_description; }; } diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index 45fcb482f0c..d4aba197ae4 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -29,13 +29,6 @@ CachedObjectStorage::CachedObjectStorage( cache->initialize(); } -DataSourceDescription CachedObjectStorage::getDataSourceDescription() const -{ - auto wrapped_object_storage_data_source = object_storage->getDataSourceDescription(); - wrapped_object_storage_data_source.is_cached = true; - return wrapped_object_storage_data_source; -} - FileCache::Key CachedObjectStorage::getCacheKey(const std::string & path) const { return cache->createKeyForPath(path); diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 4c185db051d..7dcede2c0bc 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -20,12 +20,14 @@ class CachedObjectStorage final : public IObjectStorage public: CachedObjectStorage(ObjectStoragePtr object_storage_, FileCachePtr cache_, const FileCacheSettings & cache_settings_, const String & cache_config_name_); - DataSourceDescription getDataSourceDescription() const override; - std::string getName() const override { return fmt::format("CachedObjectStorage-{}({})", cache_config_name, object_storage->getName()); } + ObjectStorageType getType() const override { return object_storage->getType(); } + std::string getCommonKeyPrefix() const override { return object_storage->getCommonKeyPrefix(); } + std::string getDescription() const override { return object_storage->getDescription(); } + bool exists(const StoredObject & object) const override; std::unique_ptr readObject( /// NOLINT diff --git a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp index 11117de67e0..5d788f2641a 100644 --- a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp +++ b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp @@ -23,8 +23,10 @@ void registerDiskCache(DiskFactory & factory, bool /* global_skip_access_check * const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, - const DisksMap & map) -> DiskPtr - { + const DisksMap & map, + bool attach, + bool custom_disk) -> DiskPtr +{ auto disk_name = config.getString(config_prefix + ".disk", ""); if (disk_name.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk Cache requires `disk` field in config"); @@ -49,7 +51,40 @@ void registerDiskCache(DiskFactory & factory, bool /* global_skip_access_check * file_cache_settings.loadFromConfig(config, config_prefix); auto config_fs_caches_dir = context->getFilesystemCachesPath(); - if (config_fs_caches_dir.empty()) + if (custom_disk) + { + static constexpr auto custom_cached_disks_base_dir_in_config = "custom_cached_disks_base_directory"; + auto custom_cached_disk_path_prefix = context->getConfigRef().getString(custom_cached_disks_base_dir_in_config, config_fs_caches_dir); + if (custom_cached_disk_path_prefix.empty()) + { + if (!attach) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot create cached custom disk without either " + "`filesystem_caches_path` (common for all filesystem caches) or" + "`custom_cached_disks_base_directory` (common only for custom cached disks) in server configuration file"); + } + if (fs::path(file_cache_settings.base_path).is_relative()) + { + /// Compatibility prefix. + file_cache_settings.base_path = fs::path(context->getPath()) / "caches" / file_cache_settings.base_path; + } + } + else + { + if (fs::path(file_cache_settings.base_path).is_relative()) + file_cache_settings.base_path = fs::path(custom_cached_disk_path_prefix) / file_cache_settings.base_path; + + if (!attach && !pathStartsWith(file_cache_settings.base_path, custom_cached_disk_path_prefix)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Filesystem cache path must lie inside `{}` (for disk: {})", + config_fs_caches_dir, name); + } + } + } + else if (config_fs_caches_dir.empty()) { if (fs::path(file_cache_settings.base_path).is_relative()) file_cache_settings.base_path = fs::path(context->getPath()) / "caches" / file_cache_settings.base_path; @@ -59,7 +94,7 @@ void registerDiskCache(DiskFactory & factory, bool /* global_skip_access_check * if (fs::path(file_cache_settings.base_path).is_relative()) file_cache_settings.base_path = fs::path(config_fs_caches_dir) / file_cache_settings.base_path; - if (!pathStartsWith(file_cache_settings.base_path, config_fs_caches_dir)) + if (!attach && !pathStartsWith(file_cache_settings.base_path, config_fs_caches_dir)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem cache path {} must lie inside default filesystem cache path `{}`", diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 6962248c7e1..b6445d5763a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -61,21 +61,29 @@ DiskTransactionPtr DiskObjectStorage::createObjectStorageTransactionToAnotherDis DiskObjectStorage::DiskObjectStorage( const String & name_, const String & object_key_prefix_, - const String & log_name, MetadataStoragePtr metadata_storage_, ObjectStoragePtr object_storage_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) : IDisk(name_, config, config_prefix) , object_key_prefix(object_key_prefix_) - , log (&Poco::Logger::get("DiskObjectStorage(" + log_name + ")")) + , log (&Poco::Logger::get("DiskObjectStorage(" + name + ")")) , metadata_storage(std::move(metadata_storage_)) , object_storage(std::move(object_storage_)) , send_metadata(config.getBool(config_prefix + ".send_metadata", false)) , read_resource_name(config.getString(config_prefix + ".read_resource", "")) , write_resource_name(config.getString(config_prefix + ".write_resource", "")) , metadata_helper(std::make_unique(this, ReadSettings{}, WriteSettings{})) -{} +{ + data_source_description = DataSourceDescription{ + .type = DataSourceType::ObjectStorage, + .object_storage_type = object_storage->getType(), + .metadata_type = metadata_storage->getType(), + .description = object_storage->getDescription(), + .is_encrypted = false, + .is_cached = object_storage->supportsCache(), + }; +} StoredObjects DiskObjectStorage::getStorageObjects(const String & local_path) const { @@ -480,7 +488,6 @@ DiskObjectStoragePtr DiskObjectStorage::createDiskObjectStorage() return std::make_shared( getName(), object_key_prefix, - getName(), metadata_storage, object_storage, Context::getGlobalContextInstance()->getConfigRef(), diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index a664f11fab7..983af483b8a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -30,7 +30,6 @@ public: DiskObjectStorage( const String & name_, const String & object_key_prefix_, - const String & log_name, MetadataStoragePtr metadata_storage_, ObjectStoragePtr object_storage_, const Poco::Util::AbstractConfiguration & config, @@ -39,7 +38,7 @@ public: /// Create fake transaction DiskTransactionPtr createTransaction() override; - DataSourceDescription getDataSourceDescription() const override { return object_storage->getDataSourceDescription(); } + DataSourceDescription getDataSourceDescription() const override { return data_source_description; } bool supportZeroCopyReplication() const override { return true; } @@ -224,6 +223,7 @@ private: MetadataStoragePtr metadata_storage; ObjectStoragePtr object_storage; + DataSourceDescription data_source_description; UInt64 reserved_bytes = 0; UInt64 reservation_count = 0; diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index 662b20f4d31..fa5e227d853 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -31,7 +31,7 @@ void HDFSObjectStorage::startup() ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const { /// what ever data_source_description.description value is, consider that key as relative key - return ObjectStorageKey::createAsRelative(data_source_description.description, getRandomASCIIString(32)); + return ObjectStorageKey::createAsRelative(hdfs_root_path, getRandomASCIIString(32)); } bool HDFSObjectStorage::exists(const StoredObject & object) const diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index 82458974278..66095eb9f8f 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -50,20 +50,15 @@ public: , settings(std::move(settings_)) , hdfs_root_path(hdfs_root_path_) { - data_source_description.type = DataSourceType::HDFS; - data_source_description.description = hdfs_root_path_; - data_source_description.is_cached = false; - data_source_description.is_encrypted = false; } std::string getName() const override { return "HDFSObjectStorage"; } std::string getCommonKeyPrefix() const override { return hdfs_root_path; } - DataSourceDescription getDataSourceDescription() const override - { - return data_source_description; - } + std::string getDescription() const override { return hdfs_root_path; } + + ObjectStorageType getType() const override { return ObjectStorageType::HDFS; } bool exists(const StoredObject & object) const override; @@ -128,8 +123,6 @@ private: HDFSFSPtr hdfs_fs; SettingsPtr settings; const std::string hdfs_root_path; - - DataSourceDescription data_source_description; }; } diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h index 9e5078736d2..f08b0d594bd 100644 --- a/src/Disks/ObjectStorages/IMetadataStorage.h +++ b/src/Disks/ObjectStorages/IMetadataStorage.h @@ -11,6 +11,7 @@ #include #include #include +#include #include namespace DB @@ -164,6 +165,8 @@ public: /// Get metadata root path. virtual const std::string & getPath() const = 0; + virtual MetadataStorageType getType() const = 0; + /// ==== General purpose methods. Define properties of object storage file based on metadata files ==== virtual bool exists(const std::string & path) const = 0; diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index f405be72287..e5a393d3a59 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -80,12 +80,14 @@ class IObjectStorage public: IObjectStorage() = default; - virtual DataSourceDescription getDataSourceDescription() const = 0; - virtual std::string getName() const = 0; + virtual ObjectStorageType getType() const = 0; + virtual std::string getCommonKeyPrefix() const = 0; + virtual std::string getDescription() const = 0; + /// Object exists or not virtual bool exists(const StoredObject & object) const = 0; diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index c4451ebabf4..b3c1c3202a5 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -27,14 +27,10 @@ LocalObjectStorage::LocalObjectStorage(String key_prefix_) : key_prefix(std::move(key_prefix_)) , log(&Poco::Logger::get("LocalObjectStorage")) { - data_source_description.type = DataSourceType::LocalBlobStorage; if (auto block_device_id = tryGetBlockDeviceId("/"); block_device_id.has_value()) - data_source_description.description = *block_device_id; + description = *block_device_id; else - data_source_description.description = "/"; - - data_source_description.is_cached = false; - data_source_description.is_encrypted = false; + description = "/"; } bool LocalObjectStorage::exists(const StoredObject & object) const diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h index 313fd1190a3..522e73b415d 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h @@ -18,12 +18,14 @@ class LocalObjectStorage : public IObjectStorage public: explicit LocalObjectStorage(String key_prefix_); - DataSourceDescription getDataSourceDescription() const override { return data_source_description; } - std::string getName() const override { return "LocalObjectStorage"; } + ObjectStorageType getType() const override { return ObjectStorageType::Local; } + std::string getCommonKeyPrefix() const override { return key_prefix; } + std::string getDescription() const override { return description; } + bool exists(const StoredObject & object) const override; std::unique_ptr readObject( /// NOLINT @@ -89,7 +91,7 @@ public: private: String key_prefix; Poco::Logger * log; - DataSourceDescription data_source_description; + std::string description; }; } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h index 4116659ab9a..7059d8e9a6a 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h @@ -32,6 +32,8 @@ public: const std::string & getPath() const override; + MetadataStorageType getType() const override { return MetadataStorageType::Local; } + bool exists(const std::string & path) const override; bool isFile(const std::string & path) const override; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 2ef823d07a4..1bb008c0c19 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -38,6 +38,8 @@ public: const std::string & getPath() const override; + MetadataStorageType getType() const override { return MetadataStorageType::Plain; } + bool exists(const std::string & path) const override; bool isFile(const std::string & path) const override; diff --git a/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp b/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp index c2e599f4608..383a0b079b5 100644 --- a/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp @@ -6,31 +6,25 @@ namespace DB { -namespace ErrorCodes -{ - extern const int UNKNOWN_ELEMENT_IN_CONFIG; -} void registerObjectStorages(); void registerMetadataStorages(); -static std::string getCompatibilityMetadataTypeHint(const DataSourceDescription & description) +static std::string getCompatibilityMetadataTypeHint(const ObjectStorageType & type) { - switch (description.type) + switch (type) { - case DataSourceType::S3: - case DataSourceType::HDFS: - case DataSourceType::LocalBlobStorage: - case DataSourceType::AzureBlobStorage: + case ObjectStorageType::S3: + case ObjectStorageType::HDFS: + case ObjectStorageType::Local: + case ObjectStorageType::Azure: return "local"; - case DataSourceType::S3_Plain: + case ObjectStorageType::S3_Plain: return "plain"; - case DataSourceType::WebServer: + case ObjectStorageType::Web: return "web"; - default: - throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, - "Cannot get compatibility metadata hint: " - "no such object storage type: {}", toString(description.type)); + case ObjectStorageType::None: + return ""; } UNREACHABLE(); } @@ -45,13 +39,16 @@ void registerDiskObjectStorage(DiskFactory & factory, bool global_skip_access_ch const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, - const DisksMap & /*map*/) -> DiskPtr + const DisksMap & /* map */, + bool, bool) -> DiskPtr { bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); auto object_storage = ObjectStorageFactory::instance().create(name, config, config_prefix, context, skip_access_check); - auto compatibility_metadata_type_hint = config.has("metadata_type") - ? "" - : getCompatibilityMetadataTypeHint(object_storage->getDataSourceDescription()); + std::string compatibility_metadata_type_hint; + if (!config.has(config_prefix + ".metadata_type")) + { + compatibility_metadata_type_hint = getCompatibilityMetadataTypeHint(object_storage->getType()); + } auto metadata_storage = MetadataStorageFactory::instance().create( name, config, config_prefix, object_storage, compatibility_metadata_type_hint); @@ -59,7 +56,6 @@ void registerDiskObjectStorage(DiskFactory & factory, bool global_skip_access_ch DiskObjectStoragePtr disk = std::make_shared( name, object_storage->getCommonKeyPrefix(), - fmt::format("Disk_{}({})", toString(object_storage->getDataSourceDescription().type), name), std::move(metadata_storage), std::move(object_storage), config, diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 1323f3d01f0..820d4977f98 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -59,13 +59,8 @@ private: , client(std::move(client_)) , s3_settings(std::move(s3_settings_)) , s3_capabilities(s3_capabilities_) + , log(&Poco::Logger::get(logger_name)) { - data_source_description.type = DataSourceType::S3; - data_source_description.description = uri_.endpoint; - data_source_description.is_cached = false; - data_source_description.is_encrypted = false; - - log = &Poco::Logger::get(logger_name); } public: @@ -75,15 +70,14 @@ public: { } - DataSourceDescription getDataSourceDescription() const override - { - return data_source_description; - } - std::string getName() const override { return "S3ObjectStorage"; } std::string getCommonKeyPrefix() const override { return uri.key; } + std::string getDescription() const override { return uri.endpoint; } + + ObjectStorageType getType() const override { return ObjectStorageType::S3; } + bool exists(const StoredObject & object) const override; std::unique_ptr readObject( /// NOLINT @@ -186,7 +180,6 @@ private: S3Capabilities s3_capabilities; Poco::Logger * log; - DataSourceDescription data_source_description; }; /// Do not encode keys, store as-is, and do not require separate disk for metadata. @@ -200,10 +193,9 @@ public: template explicit S3PlainObjectStorage(Args && ...args) - : S3ObjectStorage("S3PlainObjectStorage", std::forward(args)...) - { - data_source_description.type = DataSourceType::S3_Plain; - } + : S3ObjectStorage("S3PlainObjectStorage", std::forward(args)...) {} + + ObjectStorageType getType() const override { return ObjectStorageType::S3_Plain; } /// Notes: /// - supports BACKUP to this disk diff --git a/src/Disks/ObjectStorages/StaticDirectoryIterator.h b/src/Disks/ObjectStorages/StaticDirectoryIterator.h index 891bdb688f0..3e4ad1c3ea9 100644 --- a/src/Disks/ObjectStorages/StaticDirectoryIterator.h +++ b/src/Disks/ObjectStorages/StaticDirectoryIterator.h @@ -22,7 +22,13 @@ public: std::string path() const override { return iter->string(); } - std::string name() const override { return iter->filename(); } + std::string name() const override + { + if (iter->filename().empty()) + return iter->parent_path().filename(); + else + return iter->filename(); + } private: std::vector dir_file_paths; diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp index 6e9c767a4fd..5ab9d3f3631 100644 --- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp +++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp @@ -61,21 +61,21 @@ bool MetadataStorageFromStaticFilesWebServer::isFile(const std::string & path) c { assertExists(path); auto file_info = object_storage.getFileInfo(path); - return file_info.type == WebObjectStorage::FileType::File; + return file_info->type == WebObjectStorage::FileType::File; } bool MetadataStorageFromStaticFilesWebServer::isDirectory(const std::string & path) const { assertExists(path); auto file_info = object_storage.getFileInfo(path); - return file_info.type == WebObjectStorage::FileType::Directory; + return file_info->type == WebObjectStorage::FileType::Directory; } uint64_t MetadataStorageFromStaticFilesWebServer::getFileSize(const String & path) const { assertExists(path); auto file_info = object_storage.getFileInfo(path); - return file_info.size; + return file_info->size; } StoredObjects MetadataStorageFromStaticFilesWebServer::getStorageObjects(const std::string & path) const @@ -87,7 +87,7 @@ StoredObjects MetadataStorageFromStaticFilesWebServer::getStorageObjects(const s remote_path = remote_path.substr(object_storage.url.size()); auto file_info = object_storage.getFileInfo(path); - return {StoredObject(remote_path, path, file_info.size)}; + return {StoredObject(remote_path, path, file_info->size)}; } std::vector MetadataStorageFromStaticFilesWebServer::listDirectory(const std::string & path) const diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h index 1b17cac994d..b720a9c91f3 100644 --- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h +++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h @@ -26,6 +26,8 @@ public: const std::string & getPath() const override; + MetadataStorageType getType() const override { return MetadataStorageType::StaticWeb; } + bool exists(const std::string & path) const override; bool isFile(const std::string & path) const override; diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index b2dd9e15c69..ff4216a83da 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -31,17 +31,21 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; } -std::vector WebObjectStorage::loadFiles(const String & uri_path, const std::unique_lock &) const +std::pair> +WebObjectStorage::loadFiles(const String & path, const std::unique_lock &) const { std::vector loaded_files; - LOG_TRACE(log, "Loading metadata for directory: {}", uri_path); + auto full_url = fs::path(url) / path; + LOG_TRACE(log, "Adding directory: {} ({})", path, full_url); + + FileDataPtr result; try { Poco::Net::HTTPBasicCredentials credentials{}; ReadWriteBufferFromHTTP metadata_buf( - Poco::URI(fs::path(uri_path) / ".index"), + Poco::URI(fs::path(full_url) / ".index"), Poco::Net::HTTPRequest::HTTP_GET, ReadWriteBufferFromHTTP::OutStreamCallback(), ConnectionTimeouts::getHTTPTimeouts( @@ -53,10 +57,6 @@ std::vector WebObjectStorage::loadFiles(const String & uri_path, const getContext()->getReadSettings()); String file_name; - FileData file_data{}; - - String dir_name = fs::path(uri_path.substr(url.size())) / ""; - LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Adding directory: {}", dir_name); while (!metadata_buf.eof()) { @@ -65,30 +65,43 @@ std::vector WebObjectStorage::loadFiles(const String & uri_path, const bool is_directory; readBoolText(is_directory, metadata_buf); + size_t size = 0; if (!is_directory) { assertChar('\t', metadata_buf); - readIntText(file_data.size, metadata_buf); + readIntText(size, metadata_buf); } assertChar('\n', metadata_buf); - file_data.type = is_directory ? FileType::Directory : FileType::File; - String file_path = fs::path(uri_path) / file_name; + FileDataPtr file_data = is_directory + ? FileData::createDirectoryInfo(false) + : FileData::createFileInfo(size); - file_path = file_path.substr(url.size()); - LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Adding file: {}, size: {}", file_path, file_data.size); + auto file_path = fs::path(path) / file_name; + const bool inserted = files.add(file_path, file_data).second; + if (!inserted) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Loading data for {} more than once", file_path); - files.emplace(std::make_pair(file_path, file_data)); + LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Adding file: {}, size: {}", file_path, size); loaded_files.emplace_back(file_path); } - files.emplace(std::make_pair(dir_name, FileData({ .type = FileType::Directory }))); + auto [it, inserted] = files.add(path, FileData::createDirectoryInfo(true)); + if (!inserted) + { + if (it->second->loaded_children) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Loading data for {} more than once", path); + + it->second->loaded_children = true; + } + + return std::pair(it->second, loaded_files); } catch (HTTPException & e) { /// 404 - no files if (e.getHTTPStatus() == Poco::Net::HTTPResponse::HTTP_NOT_FOUND) - return loaded_files; + return {}; e.addMessage("while loading disk metadata"); throw; @@ -98,8 +111,6 @@ std::vector WebObjectStorage::loadFiles(const String & uri_path, const e.addMessage("while loading disk metadata"); throw; } - - return loaded_files; } @@ -120,15 +131,15 @@ bool WebObjectStorage::exists(const StoredObject & object) const bool WebObjectStorage::exists(const std::string & path) const { LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Checking existence of path: {}", path); - return tryGetFileInfo(path) != std::nullopt; + return tryGetFileInfo(path) != nullptr; } -WebObjectStorage::FileData WebObjectStorage::getFileInfo(const String & path) const +WebObjectStorage::FileDataPtr WebObjectStorage::getFileInfo(const String & path) const { auto file_info = tryGetFileInfo(path); if (!file_info) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "No such file: {}", path); - return file_info.value(); + return file_info; } std::vector WebObjectStorage::listDirectory(const String & path) const @@ -144,88 +155,80 @@ std::vector WebObjectStorage::listDirectory(const String if (!file_info->loaded_children) { std::unique_lock unique_lock(metadata_mutex); - result = loadFiles(fs::path(url) / path, unique_lock); - file_info->loaded_children = true; + if (!file_info->loaded_children) + return loadFiles(path, unique_lock).second; } - else + std::shared_lock shared_lock(metadata_mutex); + for (const auto & [file_path, _] : files) { - std::shared_lock shared_lock(metadata_mutex); - for (const auto & [file_path, _] : files) - { - if (fs::path(parentPath(file_path)) / "" == fs::path(path) / "") - result.emplace_back(file_path); - } + if (fs::path(parentPath(file_path)) / "" == fs::path(path) / "") + result.emplace_back(file_path); } return result; } -std::optional WebObjectStorage::tryGetFileInfo(const String & path) const +WebObjectStorage::FileDataPtr WebObjectStorage::tryGetFileInfo(const String & path) const { std::shared_lock shared_lock(metadata_mutex); - if (files.find(path) == files.end()) + bool is_file = fs::path(path).has_extension(); + if (auto it = files.find(path, is_file); it != files.end()) + return it->second; + + if (is_file) { shared_lock.unlock(); - bool is_file = fs::path(path).has_extension(); - if (is_file) + const auto parent_path = fs::path(path).parent_path(); + auto parent_info = tryGetFileInfo(parent_path); + if (!parent_info) { - const auto parent_path = fs::path(path).parent_path(); - auto parent_info = tryGetFileInfo(parent_path); - if (!parent_info) - return std::nullopt; /// Even parent path does not exist. - - if (parent_info->loaded_children) - { - return std::nullopt; - } - else - { - std::unique_lock unique_lock(metadata_mutex); - loadFiles(fs::path(url) / parent_path, unique_lock); - parent_info->loaded_children = true; - } + return nullptr; } - else + + if (!parent_info->loaded_children) { std::unique_lock unique_lock(metadata_mutex); - loadFiles(fs::path(url) / path, unique_lock); + if (!parent_info->loaded_children) + loadFiles(parent_path, unique_lock); } shared_lock.lock(); + + if (auto jt = files.find(path, is_file); jt != files.end()) + return jt->second; + else + { + return nullptr; + } } - - if (files.empty()) - return std::nullopt; - - if (auto it = files.find(path); it != files.end()) - return it->second; - - /// `object_storage.files` contains files + directories only inside `metadata_path / uuid_3_digit / uuid /` - /// (specific table files only), but we need to be able to also tell if `exists()`, for example. - auto it = std::lower_bound( - files.begin(), files.end(), path, - [](const auto & file, const std::string & path_) { return file.first < path_; } - ); - - if (it == files.end()) - return std::nullopt; - - if (startsWith(it->first, path) - || (it != files.begin() && startsWith(std::prev(it)->first, path))) + else { + auto it = std::lower_bound( + files.begin(), files.end(), path, + [](const auto & file, const std::string & path_) { return file.first < path_; } + ); + if (it != files.end()) + { + if (startsWith(it->first, path) + || (it != files.begin() && startsWith(std::prev(it)->first, path))) + { + shared_lock.unlock(); + std::unique_lock unique_lock(metadata_mutex); + + /// Add this directory path not files cache to simplify further checks for this path. + return files.add(path, FileData::createDirectoryInfo(false)).first->second; + } + } + shared_lock.unlock(); std::unique_lock unique_lock(metadata_mutex); - /// Add this directory path not files cache to simplify further checks for this path. - files.emplace(std::make_pair(path, FileData({.type = FileType::Directory}))); - - unique_lock.unlock(); - shared_lock.lock(); - - return FileData{ .type = FileType::Directory }; + if (auto jt = files.find(path, is_file); jt != files.end()) + return jt->second; + else + return loadFiles(path, unique_lock).first; } - return std::nullopt; } std::unique_ptr WebObjectStorage::readObjects( /// NOLINT diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.h b/src/Disks/ObjectStorages/Web/WebObjectStorage.h index 69b91dfdfde..9688873f0c4 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h @@ -21,19 +21,13 @@ class WebObjectStorage : public IObjectStorage, WithContext public: WebObjectStorage(const String & url_, ContextPtr context_); - DataSourceDescription getDataSourceDescription() const override - { - return DataSourceDescription{ - .type = DataSourceType::WebServer, - .description = url, - .is_encrypted = false, - .is_cached = false, - }; - } - std::string getName() const override { return "WebObjectStorage"; } - std::string getCommonKeyPrefix() const override { return ""; } + ObjectStorageType getType() const override { return ObjectStorageType::Web; } + + std::string getCommonKeyPrefix() const override { return url; } + + std::string getDescription() const override { return url; } bool exists(const StoredObject & object) const override; @@ -110,23 +104,58 @@ protected: Directory }; + struct FileData; + using FileDataPtr = std::shared_ptr; + struct FileData { - FileType type{}; - size_t size = 0; - bool loaded_children = false; + FileData(FileType type_, size_t size_, bool loaded_children_ = false) + : type(type_), size(size_), loaded_children(loaded_children_) {} + + static FileDataPtr createFileInfo(size_t size_) + { + return std::make_shared(FileType::File, size_, false); + } + + static FileDataPtr createDirectoryInfo(bool loaded_childrent_) + { + return std::make_shared(FileType::Directory, 0, loaded_childrent_); + } + + FileType type; + size_t size; + std::atomic loaded_children; + }; + + struct Files : public std::map + { + auto find(const String & path, bool is_file) const + { + if (is_file) + return std::map::find(path); + else + return std::map::find(path.ends_with("/") ? path : path + '/'); + } + + auto add(const String & path, FileDataPtr data) + { + if (data->type == FileType::Directory) + return emplace(path.ends_with("/") ? path : path + '/', data); + else + return emplace(path, data); + } }; - using Files = std::map; /// file path -> file data mutable Files files; mutable std::shared_mutex metadata_mutex; - std::optional tryGetFileInfo(const String & path) const; + FileDataPtr tryGetFileInfo(const String & path) const; std::vector listDirectory(const String & path) const; - FileData getFileInfo(const String & path) const; + FileDataPtr getFileInfo(const String & path) const; private: - std::vector loadFiles(const String & path, const std::unique_lock &) const; + std::pair> + loadFiles(const String & path, const std::unique_lock &) const; const String url; Poco::Logger * log; diff --git a/src/Disks/getOrCreateDiskFromAST.cpp b/src/Disks/getOrCreateDiskFromAST.cpp index da318303f62..ab2fb5e7f8b 100644 --- a/src/Disks/getOrCreateDiskFromAST.cpp +++ b/src/Disks/getOrCreateDiskFromAST.cpp @@ -24,7 +24,7 @@ namespace ErrorCodes namespace { - std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context) + std::string getOrCreateDiskFromDiskAST(const ASTFunction & function, ContextPtr context, bool attach) { const auto * function_args_expr = assert_cast(function.arguments.get()); const auto & function_args = function_args_expr->children; @@ -46,7 +46,8 @@ namespace } auto result_disk = context->getOrCreateDisk(disk_name, [&](const DisksMap & disks_map) -> DiskPtr { - auto disk = DiskFactory::instance().create(disk_name, *config, "", context, disks_map); + auto disk = DiskFactory::instance().create( + disk_name, *config, "", context, disks_map, /* attach */attach, /* custom_disk */true); /// Mark that disk can be used without storage policy. disk->markDiskAsCustom(); return disk; @@ -55,16 +56,16 @@ namespace if (!result_disk->isCustomDisk()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk with name `{}` already exist", disk_name); - if (!result_disk->isRemote()) + if (!attach && !result_disk->isRemote()) { - static constexpr auto custom_disks_base_dir_in_config = "custom_local_disks_base_directory"; - auto disk_path_expected_prefix = context->getConfigRef().getString(custom_disks_base_dir_in_config, ""); + static constexpr auto custom_local_disks_base_dir_in_config = "custom_local_disks_base_directory"; + auto disk_path_expected_prefix = context->getConfigRef().getString(custom_local_disks_base_dir_in_config, ""); if (disk_path_expected_prefix.empty()) throw Exception( ErrorCodes::BAD_ARGUMENTS, "Base path for custom local disks must be defined in config file by `{}`", - custom_disks_base_dir_in_config); + custom_local_disks_base_dir_in_config); if (!pathStartsWith(result_disk->getPath(), disk_path_expected_prefix)) throw Exception( @@ -82,6 +83,7 @@ namespace struct Data { ContextPtr context; + bool attach; }; static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } @@ -90,7 +92,7 @@ namespace { if (isDiskFunction(ast)) { - auto disk_name = getOrCreateDiskFromDiskAST(*ast->as(), data.context); + auto disk_name = getOrCreateDiskFromDiskAST(*ast->as(), data.context, data.attach); ast = std::make_shared(disk_name); } } @@ -101,14 +103,14 @@ namespace } -std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context) +std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context, bool attach) { if (!isDiskFunction(disk_function)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected a disk function"); auto ast = disk_function->clone(); - FlattenDiskConfigurationVisitor::Data data{context}; + FlattenDiskConfigurationVisitor::Data data{context, attach}; FlattenDiskConfigurationVisitor{data}.visit(ast); auto disk_name = assert_cast(*ast).value.get(); diff --git a/src/Disks/getOrCreateDiskFromAST.h b/src/Disks/getOrCreateDiskFromAST.h index 0195f575278..61e1decbee9 100644 --- a/src/Disks/getOrCreateDiskFromAST.h +++ b/src/Disks/getOrCreateDiskFromAST.h @@ -13,6 +13,6 @@ class ASTFunction; * add it to DiskSelector by a unique (but always the same for given configuration) disk name * and return this name. */ -std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context); +std::string getOrCreateDiskFromDiskAST(const ASTPtr & disk_function, ContextPtr context, bool attach); } diff --git a/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp index ca142479ff1..f3c9f511ef6 100644 --- a/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp +++ b/src/Functions/UserDefined/ExternalUserDefinedExecutableFunctionsLoader.cpp @@ -100,7 +100,8 @@ ExternalUserDefinedExecutableFunctionsLoader::ExternalUserDefinedExecutableFunct { setConfigSettings({"function", "name", "database", "uuid"}); enableAsyncLoading(false); - enablePeriodicUpdates(true); + if (getContext()->getApplicationType() == Context::ApplicationType::SERVER) + enablePeriodicUpdates(true); enableAlwaysLoadEverything(true); } diff --git a/src/Functions/visibleWidth.cpp b/src/Functions/visibleWidth.cpp index d4f6de404ff..9a3edd9fbec 100644 --- a/src/Functions/visibleWidth.cpp +++ b/src/Functions/visibleWidth.cpp @@ -6,22 +6,36 @@ #include #include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + /** visibleWidth(x) - calculates the approximate width when outputting the value in a text form to the console. * In fact it calculate the number of Unicode code points. * It does not support zero width and full width characters, combining characters, etc. */ class FunctionVisibleWidth : public IFunction { +private: + UInt64 behavior; + public: static constexpr auto name = "visibleWidth"; - static FunctionPtr create(ContextPtr) + static FunctionPtr create(ContextPtr context) { - return std::make_shared(); + return std::make_shared(context); + } + + explicit FunctionVisibleWidth(ContextPtr context) + { + behavior = context->getSettingsRef().function_visible_width_behavior; } bool useDefaultImplementationForNulls() const override { return false; } @@ -56,7 +70,7 @@ public: auto res_col = ColumnUInt64::create(size); auto & res_data = assert_cast(*res_col).getData(); - /// For simplicity reasons, function is implemented by serializing into temporary buffer. + /// For simplicity reasons, the function is implemented by serializing into temporary buffer. String tmp; FormatSettings format_settings; @@ -68,7 +82,17 @@ public: serialization->serializeText(*src.column, i, out, format_settings); } - res_data[i] = UTF8::countCodePoints(reinterpret_cast(tmp.data()), tmp.size()); + switch (behavior) + { + case 0: + res_data[i] = UTF8::countCodePoints(reinterpret_cast(tmp.data()), tmp.size()); + break; + case 1: + res_data[i] = UTF8::computeWidth(reinterpret_cast(tmp.data()), tmp.size()); + break; + default: + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported value {} of the `function_visible_width_behavior` setting", behavior); + } } return res_col; diff --git a/src/IO/Archives/LibArchiveReader.cpp b/src/IO/Archives/LibArchiveReader.cpp index a411b4bb4b6..94e68045575 100644 --- a/src/IO/Archives/LibArchiveReader.cpp +++ b/src/IO/Archives/LibArchiveReader.cpp @@ -224,6 +224,8 @@ public: throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Seek is not supported when reading from archive"); } + bool checkIfActuallySeekable() override { return false; } + off_t getPosition() override { throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "getPosition not supported when reading from archive"); diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp index fd7a09c4f20..8c9c37e4ae0 100644 --- a/src/IO/Archives/ZipArchiveReader.cpp +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes extern const int CANNOT_UNPACK_ARCHIVE; extern const int LOGICAL_ERROR; extern const int SEEK_POSITION_OUT_OF_BOUND; + extern const int CANNOT_SEEK_THROUGH_FILE; } using RawHandle = unzFile; @@ -285,23 +286,27 @@ public: if (new_pos > static_cast(file_info.uncompressed_size)) throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bound"); - if (file_info.compression_method == MZ_COMPRESS_METHOD_STORE) - { - /// unzSeek64() works only for non-compressed files. - checkResult(unzSeek64(raw_handle, off, whence)); - return unzTell64(raw_handle); - } + /// unzSeek64() works only for non-compressed files. + /// + /// We used to have a fallback here, where we would: + /// * ignore() to "seek" forward, + /// * unzCloseCurrentFile(raw_handle) + unzOpenCurrentFile(raw_handle) to seek to the + /// beginning of the file. + /// But the close+open didn't work: after closing+reopening once, the second + /// unzCloseCurrentFile() was failing with MZ_CRC_ERROR in mz_zip_entry_read_close(). Maybe + /// it's a bug in minizip where some state was inadvertently left over after close+reopen. + /// Didn't investigate because re-reading the whole file should be avoided anyway. + if (file_info.compression_method != MZ_COMPRESS_METHOD_STORE) + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Seek in compressed archive is not supported."); - /// As a last try we go slow way, we're going to simply ignore all data before the new position. - if (new_pos < current_pos) - { - checkResult(unzCloseCurrentFile(raw_handle)); - checkResult(unzOpenCurrentFile(raw_handle)); - current_pos = 0; - } + checkResult(unzSeek64(raw_handle, off, whence)); + return unzTell64(raw_handle); + } - ignore(new_pos - current_pos); - return new_pos; + bool checkIfActuallySeekable() override + { + /// The library doesn't support seeking in compressed files. + return handle.getFileInfo().compression_method == MZ_COMPRESS_METHOD_STORE; } off_t getPosition() override diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp index 88073a72d78..ecc0d64580b 100644 --- a/src/IO/ConnectionTimeouts.cpp +++ b/src/IO/ConnectionTimeouts.cpp @@ -5,81 +5,6 @@ namespace DB { -ConnectionTimeouts::ConnectionTimeouts( - Poco::Timespan connection_timeout_, - Poco::Timespan send_timeout_, - Poco::Timespan receive_timeout_) - : connection_timeout(connection_timeout_) - , send_timeout(send_timeout_) - , receive_timeout(receive_timeout_) - , tcp_keep_alive_timeout(0) - , http_keep_alive_timeout(0) - , secure_connection_timeout(connection_timeout) - , hedged_connection_timeout(receive_timeout_) - , receive_data_timeout(receive_timeout_) - , handshake_timeout(receive_timeout_) -{ -} - -ConnectionTimeouts::ConnectionTimeouts( - Poco::Timespan connection_timeout_, - Poco::Timespan send_timeout_, - Poco::Timespan receive_timeout_, - Poco::Timespan tcp_keep_alive_timeout_, - Poco::Timespan handshake_timeout_) - : connection_timeout(connection_timeout_) - , send_timeout(send_timeout_) - , receive_timeout(receive_timeout_) - , tcp_keep_alive_timeout(tcp_keep_alive_timeout_) - , http_keep_alive_timeout(0) - , secure_connection_timeout(connection_timeout) - , hedged_connection_timeout(receive_timeout_) - , receive_data_timeout(receive_timeout_) - , handshake_timeout(handshake_timeout_) -{ -} - -ConnectionTimeouts::ConnectionTimeouts( - Poco::Timespan connection_timeout_, - Poco::Timespan send_timeout_, - Poco::Timespan receive_timeout_, - Poco::Timespan tcp_keep_alive_timeout_, - Poco::Timespan http_keep_alive_timeout_, - Poco::Timespan handshake_timeout_) - : connection_timeout(connection_timeout_) - , send_timeout(send_timeout_) - , receive_timeout(receive_timeout_) - , tcp_keep_alive_timeout(tcp_keep_alive_timeout_) - , http_keep_alive_timeout(http_keep_alive_timeout_) - , secure_connection_timeout(connection_timeout) - , hedged_connection_timeout(receive_timeout_) - , receive_data_timeout(receive_timeout_) - , handshake_timeout(handshake_timeout_) -{ -} - -ConnectionTimeouts::ConnectionTimeouts( - Poco::Timespan connection_timeout_, - Poco::Timespan send_timeout_, - Poco::Timespan receive_timeout_, - Poco::Timespan tcp_keep_alive_timeout_, - Poco::Timespan http_keep_alive_timeout_, - Poco::Timespan secure_connection_timeout_, - Poco::Timespan hedged_connection_timeout_, - Poco::Timespan receive_data_timeout_, - Poco::Timespan handshake_timeout_) - : connection_timeout(connection_timeout_) - , send_timeout(send_timeout_) - , receive_timeout(receive_timeout_) - , tcp_keep_alive_timeout(tcp_keep_alive_timeout_) - , http_keep_alive_timeout(http_keep_alive_timeout_) - , secure_connection_timeout(secure_connection_timeout_) - , hedged_connection_timeout(hedged_connection_timeout_) - , receive_data_timeout(receive_data_timeout_) - , handshake_timeout(handshake_timeout_) -{ -} - Poco::Timespan ConnectionTimeouts::saturate(Poco::Timespan timespan, Poco::Timespan limit) { if (limit.totalMicroseconds() == 0) @@ -88,49 +13,36 @@ Poco::Timespan ConnectionTimeouts::saturate(Poco::Timespan timespan, Poco::Times return (timespan > limit) ? limit : timespan; } -ConnectionTimeouts ConnectionTimeouts::getSaturated(Poco::Timespan limit) const -{ - return ConnectionTimeouts(saturate(connection_timeout, limit), - saturate(send_timeout, limit), - saturate(receive_timeout, limit), - saturate(tcp_keep_alive_timeout, limit), - saturate(http_keep_alive_timeout, limit), - saturate(secure_connection_timeout, limit), - saturate(hedged_connection_timeout, limit), - saturate(receive_data_timeout, limit), - saturate(handshake_timeout, limit)); -} - /// Timeouts for the case when we have just single attempt to connect. ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithoutFailover(const Settings & settings) { - return ConnectionTimeouts(settings.connect_timeout, settings.send_timeout, settings.receive_timeout, settings.tcp_keep_alive_timeout, settings.handshake_timeout_ms); + return ConnectionTimeouts() + .withConnectionTimeout(settings.connect_timeout) + .withSendTimeout(settings.send_timeout) + .withReceiveTimeout(settings.receive_timeout) + .withTcpKeepAliveTimeout(settings.tcp_keep_alive_timeout) + .withHandshakeTimeout(settings.handshake_timeout_ms) + .withHedgedConnectionTimeout(settings.hedged_connection_timeout_ms) + .withReceiveDataTimeout(settings.receive_data_timeout_ms); } /// Timeouts for the case when we will try many addresses in a loop. ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithFailover(const Settings & settings) { - return ConnectionTimeouts( - settings.connect_timeout_with_failover_ms, - settings.send_timeout, - settings.receive_timeout, - settings.tcp_keep_alive_timeout, - 0, - settings.connect_timeout_with_failover_secure_ms, - settings.hedged_connection_timeout_ms, - settings.receive_data_timeout_ms, - settings.handshake_timeout_ms); + return getTCPTimeoutsWithoutFailover(settings) + .withUnsecureConnectionTimeout(settings.connect_timeout_with_failover_ms) + .withSecureConnectionTimeout(settings.connect_timeout_with_failover_secure_ms); } ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings, Poco::Timespan http_keep_alive_timeout) { - return ConnectionTimeouts( - settings.http_connection_timeout, - settings.http_send_timeout, - settings.http_receive_timeout, - settings.tcp_keep_alive_timeout, - http_keep_alive_timeout, - settings.http_receive_timeout); + return ConnectionTimeouts() + .withConnectionTimeout(settings.http_connection_timeout) + .withSendTimeout(settings.http_send_timeout) + .withReceiveTimeout(settings.http_receive_timeout) + .withHttpKeepAliveTimeout(http_keep_alive_timeout) + .withTcpKeepAliveTimeout(settings.tcp_keep_alive_timeout) + .withHandshakeTimeout(settings.handshake_timeout_ms); } ConnectionTimeouts ConnectionTimeouts::getFetchPartHTTPTimeouts(const ServerSettings & server_settings, const Settings & user_settings) @@ -224,11 +136,9 @@ ConnectionTimeouts ConnectionTimeouts::getAdaptiveTimeouts(const String & method auto [send, recv] = SendReceiveTimeoutsForFirstAttempt::getSendReceiveTimeout(method, first_byte); - auto aggressive = *this; - aggressive.send_timeout = saturate(send, send_timeout); - aggressive.receive_timeout = saturate(recv, receive_timeout); - - return aggressive; + return ConnectionTimeouts(*this) + .withSendTimeout(saturate(send, send_timeout)) + .withReceiveTimeout(saturate(recv, receive_timeout)); } } diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h index 42c4312d1d8..6967af08204 100644 --- a/src/IO/ConnectionTimeouts.h +++ b/src/IO/ConnectionTimeouts.h @@ -11,54 +11,40 @@ namespace DB struct Settings; +#define APPLY_FOR_ALL_CONNECTION_TIMEOUT_MEMBERS(M) \ + M(connection_timeout, withUnsecureConnectionTimeout) \ + M(secure_connection_timeout, withSecureConnectionTimeout) \ + M(send_timeout, withSendTimeout) \ + M(receive_timeout, withReceiveTimeout) \ + M(tcp_keep_alive_timeout, withTcpKeepAliveTimeout) \ + M(http_keep_alive_timeout, withHttpKeepAliveTimeout) \ + M(hedged_connection_timeout, withHedgedConnectionTimeout) \ + M(receive_data_timeout, withReceiveDataTimeout) \ + M(handshake_timeout, withHandshakeTimeout) \ + M(sync_request_timeout, withSyncRequestTimeout) \ + + struct ConnectionTimeouts { - Poco::Timespan connection_timeout; - Poco::Timespan send_timeout; - Poco::Timespan receive_timeout; - Poco::Timespan tcp_keep_alive_timeout; - Poco::Timespan http_keep_alive_timeout; - Poco::Timespan secure_connection_timeout; + Poco::Timespan connection_timeout = Poco::Timespan(DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, 0); + Poco::Timespan secure_connection_timeout = Poco::Timespan(DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, 0); + + Poco::Timespan send_timeout = Poco::Timespan(DBMS_DEFAULT_SEND_TIMEOUT_SEC, 0); + Poco::Timespan receive_timeout = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0); + + Poco::Timespan tcp_keep_alive_timeout = Poco::Timespan(DEFAULT_TCP_KEEP_ALIVE_TIMEOUT, 0); + Poco::Timespan http_keep_alive_timeout = Poco::Timespan(DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, 0); /// Timeouts for HedgedConnections - Poco::Timespan hedged_connection_timeout; - Poco::Timespan receive_data_timeout; - + Poco::Timespan hedged_connection_timeout = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0); + Poco::Timespan receive_data_timeout = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0); /// Timeout for receiving HELLO packet - Poco::Timespan handshake_timeout; - + Poco::Timespan handshake_timeout = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0); /// Timeout for synchronous request-result protocol call (like Ping or TablesStatus) Poco::Timespan sync_request_timeout = Poco::Timespan(DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC, 0); ConnectionTimeouts() = default; - ConnectionTimeouts(Poco::Timespan connection_timeout_, - Poco::Timespan send_timeout_, - Poco::Timespan receive_timeout_); - - ConnectionTimeouts(Poco::Timespan connection_timeout_, - Poco::Timespan send_timeout_, - Poco::Timespan receive_timeout_, - Poco::Timespan tcp_keep_alive_timeout_, - Poco::Timespan handshake_timeout_); - - ConnectionTimeouts(Poco::Timespan connection_timeout_, - Poco::Timespan send_timeout_, - Poco::Timespan receive_timeout_, - Poco::Timespan tcp_keep_alive_timeout_, - Poco::Timespan http_keep_alive_timeout_, - Poco::Timespan handshake_timeout_); - - ConnectionTimeouts(Poco::Timespan connection_timeout_, - Poco::Timespan send_timeout_, - Poco::Timespan receive_timeout_, - Poco::Timespan tcp_keep_alive_timeout_, - Poco::Timespan http_keep_alive_timeout_, - Poco::Timespan secure_connection_timeout_, - Poco::Timespan hedged_connection_timeout_, - Poco::Timespan receive_data_timeout_, - Poco::Timespan handshake_timeout_); - static Poco::Timespan saturate(Poco::Timespan timespan, Poco::Timespan limit); ConnectionTimeouts getSaturated(Poco::Timespan limit) const; @@ -72,6 +58,57 @@ struct ConnectionTimeouts static ConnectionTimeouts getFetchPartHTTPTimeouts(const ServerSettings & server_settings, const Settings & user_settings); ConnectionTimeouts getAdaptiveTimeouts(const String & method, bool first_attempt, bool first_byte) const; + +#define DECLARE_BUILDER_FOR_MEMBER(member, setter_func) \ + ConnectionTimeouts & setter_func(size_t seconds); \ + ConnectionTimeouts & setter_func(Poco::Timespan span); \ + +APPLY_FOR_ALL_CONNECTION_TIMEOUT_MEMBERS(DECLARE_BUILDER_FOR_MEMBER) +#undef DECLARE_BUILDER_FOR_MEMBER + + ConnectionTimeouts & withConnectionTimeout(size_t seconds); + ConnectionTimeouts & withConnectionTimeout(Poco::Timespan span); }; +#define DEFINE_BUILDER_FOR_MEMBER(member, setter_func) \ + inline ConnectionTimeouts & ConnectionTimeouts::setter_func(size_t seconds) \ + { \ + return setter_func(Poco::Timespan(seconds, 0)); \ + } \ + inline ConnectionTimeouts & ConnectionTimeouts::setter_func(Poco::Timespan span) \ + { \ + member = span; \ + return *this; \ + } \ + + APPLY_FOR_ALL_CONNECTION_TIMEOUT_MEMBERS(DEFINE_BUILDER_FOR_MEMBER) + +#undef DEFINE_BUILDER_FOR_MEMBER + + +inline ConnectionTimeouts ConnectionTimeouts::getSaturated(Poco::Timespan limit) const +{ +#define SATURATE_MEMBER(member, setter_func) \ + .setter_func(saturate(member, limit)) + + return ConnectionTimeouts(*this) +APPLY_FOR_ALL_CONNECTION_TIMEOUT_MEMBERS(SATURATE_MEMBER); + +#undef SATURETE_MEMBER +} + +#undef APPLY_FOR_ALL_CONNECTION_TIMEOUT_MEMBERS + +inline ConnectionTimeouts & ConnectionTimeouts::withConnectionTimeout(size_t seconds) +{ + return withConnectionTimeout(Poco::Timespan(seconds, 0)); +} + +inline ConnectionTimeouts & ConnectionTimeouts::withConnectionTimeout(Poco::Timespan span) +{ + connection_timeout = span; + secure_connection_timeout = span; + return *this; +} + } diff --git a/src/IO/ReadBufferFromIStream.cpp b/src/IO/ReadBufferFromIStream.cpp index 3b3bdb5c564..52546f1703d 100644 --- a/src/IO/ReadBufferFromIStream.cpp +++ b/src/IO/ReadBufferFromIStream.cpp @@ -12,33 +12,46 @@ namespace ErrorCodes bool ReadBufferFromIStream::nextImpl() { - istr.read(internal_buffer.begin(), internal_buffer.size()); - size_t gcount = istr.gcount(); + if (eof) + return false; - if (!gcount) + size_t bytes_read = 0; + char * read_to = internal_buffer.begin(); + + /// It is necessary to read in a loop, since socket usually returns only data available at the moment. + while (bytes_read < internal_buffer.size()) { - if (istr.eof()) - return false; + try + { + const auto bytes_read_last_time = stream_buf.readFromDevice(read_to, internal_buffer.size() - bytes_read); + if (bytes_read_last_time <= 0) + { + eof = true; + break; + } - if (istr.fail()) - throw Exception(ErrorCodes::CANNOT_READ_FROM_ISTREAM, "Cannot read from istream at offset {}", count()); - - throw Exception(ErrorCodes::CANNOT_READ_FROM_ISTREAM, "Unexpected state of istream at offset {}", count()); + bytes_read += bytes_read_last_time; + read_to += bytes_read_last_time; + } + catch (...) + { + throw Exception( + ErrorCodes::CANNOT_READ_FROM_ISTREAM, + "Cannot read from istream at offset {}: {}", + count(), + getCurrentExceptionMessage(/*with_stacktrace=*/true)); + } } - else - working_buffer.resize(gcount); - return true; + if (bytes_read) + working_buffer.resize(bytes_read); + + return bytes_read; } ReadBufferFromIStream::ReadBufferFromIStream(std::istream & istr_, size_t size) - : BufferWithOwnMemory(size), istr(istr_) + : BufferWithOwnMemory(size), istr(istr_), stream_buf(dynamic_cast(*istr.rdbuf())) { - /// - badbit will be set if some exception will be throw from ios implementation - /// - failbit can be set when for instance read() reads less data, so we - /// cannot set it, since we are requesting to read more data, then the - /// buffer has now. - istr.exceptions(std::ios::badbit); } } diff --git a/src/IO/ReadBufferFromIStream.h b/src/IO/ReadBufferFromIStream.h index 8c3f62728b5..50ed8c20c06 100644 --- a/src/IO/ReadBufferFromIStream.h +++ b/src/IO/ReadBufferFromIStream.h @@ -3,6 +3,8 @@ #include #include +#include + namespace DB { @@ -11,6 +13,8 @@ class ReadBufferFromIStream : public BufferWithOwnMemory { private: std::istream & istr; + Poco::Net::HTTPBasicStreamBuf & stream_buf; + bool eof = false; bool nextImpl() override; diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 619fd40edc3..4529771e7b2 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index a8a31d82e56..c397689d6ad 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 297d73303c0..ca4c420288a 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -514,7 +514,7 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() } else if (initialization_error == InitializeError::RETRYABLE_ERROR) { - LOG_ERROR( + LOG_TRACE( log, "HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. " "(Current backoff wait is {}/{} ms)", diff --git a/src/IO/Resource/StaticResourceManager.cpp b/src/IO/Resource/StaticResourceManager.cpp deleted file mode 100644 index a79e8148f94..00000000000 --- a/src/IO/Resource/StaticResourceManager.cpp +++ /dev/null @@ -1,138 +0,0 @@ -#include - -#include -#include -#include - -#include -#include - -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int RESOURCE_ACCESS_DENIED; - extern const int RESOURCE_NOT_FOUND; - extern const int INVALID_SCHEDULER_NODE; -} - -StaticResourceManager::Resource::Resource( - const String & name, - EventQueue * event_queue, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix) -{ - // Initialize scheduler nodes - Poco::Util::AbstractConfiguration::Keys keys; - std::sort(keys.begin(), keys.end()); // for parents to appear before children - config.keys(config_prefix, keys); - for (const auto & key : keys) - { - if (!startsWith(key, "node")) - continue; - - // Validate path - String path = config.getString(config_prefix + "." + key + "[@path]", ""); - if (path.empty()) - throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Attribute 'path' must be specified in all nodes for resource '{}'", name); - if (path[0] != '/') - throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "path must start with '/' for resource '{}'", name); - - // Create node - String type = config.getString(config_prefix + "." + key + ".type", "fifo"); - SchedulerNodePtr node = SchedulerNodeFactory::instance().get(type, event_queue, config, config_prefix + "." + key); - node->basename = path.substr(1); - - // Take ownership - if (auto [_, inserted] = nodes.emplace(path, node); !inserted) - throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Duplicate path '{}' for resource '{}'", path, name); - - // Attach created node to parent (if not root) - if (path != "/") - { - String parent_path = path.substr(0, path.rfind('/')); - if (parent_path.empty()) - parent_path = "/"; - if (auto parent = nodes.find(parent_path); parent != nodes.end()) - parent->second->attachChild(node); - else - throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Parent doesn't exist for path '{}' for resource '{}'", path, name); - } - } - - if (nodes.find("/") == nodes.end()) - throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "undefined root node path '/' for resource '{}'", name); -} - -StaticResourceManager::Classifier::Classifier(const StaticResourceManager & manager, const ClassifierDescription & cfg) -{ - for (auto [resource_name, path] : cfg) - { - if (auto resource_iter = manager.resources.find(resource_name); resource_iter != manager.resources.end()) - { - const Resource & resource = resource_iter->second; - if (auto node_iter = resource.nodes.find(path); node_iter != resource.nodes.end()) - { - if (auto * queue = dynamic_cast(node_iter->second.get())) - resources.emplace(resource_name, ResourceLink{.queue = queue}); - else - throw Exception(ErrorCodes::RESOURCE_NOT_FOUND, "Unable to access non-queue node at path '{}' for resource '{}'", path, resource_name); - } - else - throw Exception(ErrorCodes::RESOURCE_NOT_FOUND, "Path '{}' for resource '{}' does not exist", path, resource_name); - } - else - resources.emplace(resource_name, ResourceLink{}); // resource not configured - unlimited - } -} - -ResourceLink StaticResourceManager::Classifier::get(const String & resource_name) -{ - if (auto iter = resources.find(resource_name); iter != resources.end()) - return iter->second; - else - throw Exception(ErrorCodes::RESOURCE_ACCESS_DENIED, "Access denied to resource '{}'", resource_name); -} - -void StaticResourceManager::updateConfiguration(const Poco::Util::AbstractConfiguration & config) -{ - if (!resources.empty()) - return; // already initialized, configuration update is not supported - - Poco::Util::AbstractConfiguration::Keys keys; - const String config_prefix = "resources"; - config.keys(config_prefix, keys); - - // Create resource for every element under tag - for (const auto & key : keys) - { - auto [iter, _] = resources.emplace(std::piecewise_construct, - std::forward_as_tuple(key), - std::forward_as_tuple(key, scheduler.event_queue, config, config_prefix + "." + key)); - // Attach root of resource to scheduler - scheduler.attachChild(iter->second.nodes.find("/")->second); - } - - // Initialize classifiers - classifiers = std::make_unique(config); - - // Run scheduler thread - scheduler.start(); -} - -ClassifierPtr StaticResourceManager::acquire(const String & classifier_name) -{ - return std::make_shared(*this, classifiers->get(classifier_name)); -} - -void registerStaticResourceManager(ResourceManagerFactory & factory) -{ - factory.registerMethod("static"); -} - -} diff --git a/src/IO/Resource/StaticResourceManager.h b/src/IO/Resource/StaticResourceManager.h deleted file mode 100644 index 5ec6a35750b..00000000000 --- a/src/IO/Resource/StaticResourceManager.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include -#include -#include - -#include - -namespace DB -{ - -/* - * Reads `` from config at startup and registers them in single `SchedulerRoot`. - * Do not support configuration updates, server restart is required. - */ -class StaticResourceManager : public IResourceManager -{ -public: - // Just initialization, any further updates are ignored for the sake of simplicity - // NOTE: manager must be initialized before any acquire() calls to avoid races - void updateConfiguration(const Poco::Util::AbstractConfiguration & config) override; - - ClassifierPtr acquire(const String & classifier_name) override; - - void forEachNode(VisitorFunc visitor) override - { - UNUSED(visitor); - } - -private: - struct Resource - { - std::unordered_map nodes; // by paths - - Resource( - const String & name, - EventQueue * event_queue, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix); - }; - - struct Classifier : public IClassifier - { - Classifier(const StaticResourceManager & manager, const ClassifierDescription & cfg); - ResourceLink get(const String & resource_name) override; - std::unordered_map resources; // accessible resources by names - }; - - SchedulerRoot scheduler; - std::unordered_map resources; // by name - std::unique_ptr classifiers; -}; - -} diff --git a/src/IO/Resource/tests/gtest_resource_manager_static.cpp b/src/IO/Resource/tests/gtest_resource_manager_static.cpp deleted file mode 100644 index 9c5e86e9ffc..00000000000 --- a/src/IO/Resource/tests/gtest_resource_manager_static.cpp +++ /dev/null @@ -1,104 +0,0 @@ -#include - -#include - -#include -#include - -using namespace DB; - -using ResourceTest = ResourceTestManager; -using TestGuard = ResourceTest::Guard; - -TEST(IOResourceStaticResourceManager, Smoke) -{ - ResourceTest t; - - t.update(R"CONFIG( - - - - inflight_limit10 - priority - - 1 - - - - /prio/A - /prio/B - - - )CONFIG"); - - ClassifierPtr ca = t.manager->acquire("A"); - ClassifierPtr cb = t.manager->acquire("B"); - - for (int i = 0; i < 10; i++) - { - ResourceGuard ga(ca->get("res1")); - ga.unlock(); - ResourceGuard gb(cb->get("res1")); - } -} - -TEST(IOResourceStaticResourceManager, Prioritization) -{ - std::optional last_priority; - auto check = [&] (Priority priority) - { - // Lock is not required here because this is called during request execution and we have max_requests = 1 - if (last_priority) - EXPECT_TRUE(priority >= *last_priority); // Should be true if every queue arrived at the same time at busy period start - last_priority = priority; - }; - - constexpr size_t threads_per_queue = 2; - int requests_per_thead = 100; - ResourceTest t(4 * threads_per_queue + 1); - - t.update(R"CONFIG( - - - - inflight_limit1 - priority - 1 - -1 - - - - - - - /prio/A - /prio/B - /prio/C - /prio/D - /prio/leader - - - )CONFIG"); - - for (String name : {"A", "B", "C", "D"}) - { - for (int thr = 0; thr < threads_per_queue; thr++) - { - t.threads.emplace_back([&, name] - { - ClassifierPtr c = t.manager->acquire(name); - ResourceLink link = c->get("res1"); - t.startBusyPeriod(link, 1, requests_per_thead); - for (int req = 0; req < requests_per_thead; req++) - { - TestGuard g(t, link, 1); - check(link.queue->info.priority); - } - }); - } - } - - ClassifierPtr c = t.manager->acquire("leader"); - ResourceLink link = c->get("res1"); - t.blockResource(link); -} diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index bf0679e774f..64259ce5a76 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -705,9 +705,9 @@ void Client::BuildHttpRequest(const Aws::AmazonWebServiceRequest& request, if (api_mode == ApiMode::GCS) { /// some GCS requests don't like S3 specific headers that the client sets + /// all "x-amz-*" headers have to be either converted or deleted + /// note that "amz-sdk-invocation-id" and "amz-sdk-request" are preserved httpRequest->DeleteHeader("x-amz-api-version"); - httpRequest->DeleteHeader("amz-sdk-invocation-id"); - httpRequest->DeleteHeader("amz-sdk-request"); } } diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index f2acda80adf..946bd74dcb5 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -140,16 +140,22 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion() } } +ConnectionTimeouts getTimeoutsFromConfiguration(const PocoHTTPClientConfiguration & client_configuration) +{ + return ConnectionTimeouts() + .withConnectionTimeout(Poco::Timespan(client_configuration.connectTimeoutMs * 1000)) + .withSendTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) + .withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) + .withTcpKeepAliveTimeout(Poco::Timespan( + client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0)) + .withHttpKeepAliveTimeout(Poco::Timespan( + client_configuration.http_keep_alive_timeout_ms * 1000)); /// flag indicating whether keep-alive is enabled is set to each session upon creation +} PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration) : per_request_configuration(client_configuration.per_request_configuration) , error_report(client_configuration.error_report) - , timeouts(ConnectionTimeouts( - Poco::Timespan(client_configuration.connectTimeoutMs * 1000), /// connection timeout. - Poco::Timespan(client_configuration.requestTimeoutMs * 1000), /// send timeout. - Poco::Timespan(client_configuration.requestTimeoutMs * 1000), /// receive timeout. - Poco::Timespan(client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0), - Poco::Timespan(client_configuration.http_keep_alive_timeout_ms * 1000))) /// flag indicating whether keep-alive is enabled is set to each session upon creation + , timeouts(getTimeoutsFromConfiguration(client_configuration)) , remote_host_filter(client_configuration.remote_host_filter) , s3_max_redirects(client_configuration.s3_max_redirects) , s3_use_adaptive_timeouts(client_configuration.s3_use_adaptive_timeouts) @@ -497,12 +503,12 @@ void PocoHTTPClient::makeRequestInternalImpl( LOG_TEST(log, "Written {} bytes to request body", size); } + setTimeouts(*session, getTimeouts(method, first_attempt, /*first_byte*/ false)); + if (enable_s3_requests_logging) LOG_TEST(log, "Receiving response..."); auto & response_body_stream = session->receiveResponse(poco_response); - setTimeouts(*session, getTimeouts(method, first_attempt, /*first_byte*/ false)); - watch.stop(); addMetric(request, S3MetricType::Microseconds, watch.elapsedMicroseconds()); diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 25de61360fe..830377622ef 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -89,22 +89,30 @@ namespace BlobStorageLogWriterPtr blob_storage_log; const Poco::Logger * log; + /// Represents a task uploading a single part. + /// Keep this struct small because there can be thousands of parts. + /// For example, `UploadPartTask` must not contain a read buffer or `S3::UploadPartRequest` + /// because such read buffer can consume about 1MB memory and it could cause memory issues when the number of parts is big enough. struct UploadPartTask { - std::unique_ptr req; - bool is_finished = false; + size_t part_number; + size_t part_offset; + size_t part_size; String tag; + bool is_finished = false; std::exception_ptr exception; }; + size_t num_parts; size_t normal_part_size; String multipart_upload_id; std::atomic multipart_upload_aborted = false; Strings part_tags; std::list TSA_GUARDED_BY(bg_tasks_mutex) bg_tasks; - int num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; - int num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; + size_t num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; + size_t num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; + size_t num_finished_parts TSA_GUARDED_BY(bg_tasks_mutex) = 0; std::mutex bg_tasks_mutex; std::condition_variable bg_tasks_condvar; @@ -299,7 +307,7 @@ namespace throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_upload_part_size must not be less than min_upload_part_size"); size_t part_size = min_upload_part_size; - size_t num_parts = (total_size + part_size - 1) / part_size; + num_parts = (total_size + part_size - 1) / part_size; if (num_parts > max_part_number) { @@ -338,7 +346,7 @@ namespace void uploadPart(size_t part_number, size_t part_offset, size_t part_size) { - LOG_TRACE(log, "Writing part. Bucket: {}, Key: {}, Upload_id: {}, Size: {}", dest_bucket, dest_key, multipart_upload_id, part_size); + LOG_TRACE(log, "Writing part #{} of {}. Bucket: {}, Key: {}, Upload_id: {}, Size: {}", part_number, num_parts, dest_bucket, dest_key, multipart_upload_id, part_size); if (!part_size) { @@ -353,6 +361,9 @@ namespace { std::lock_guard lock(bg_tasks_mutex); task = &bg_tasks.emplace_back(); + task->part_number = part_number; + task->part_offset = part_offset; + task->part_size = part_size; ++num_added_bg_tasks; } @@ -371,8 +382,6 @@ namespace try { - task->req = fillUploadPartRequest(part_number, part_offset, part_size); - schedule([this, task, task_finish_notify]() { try @@ -395,7 +404,9 @@ namespace else { UploadPartTask task; - task.req = fillUploadPartRequest(part_number, part_offset, part_size); + task.part_number = part_number; + task.part_offset = part_offset; + task.part_size = part_size; processUploadTask(task); part_tags.push_back(task.tag); } @@ -406,14 +417,18 @@ namespace if (multipart_upload_aborted) return; /// Already aborted. - auto tag = processUploadPartRequest(*task.req); + auto request = makeUploadPartRequest(task.part_number, task.part_offset, task.part_size); + auto tag = processUploadPartRequest(*request); std::lock_guard lock(bg_tasks_mutex); /// Protect bg_tasks from race task.tag = tag; - LOG_TRACE(log, "Writing part finished. Bucket: {}, Key: {}, Upload_id: {}, Etag: {}, Parts: {}", dest_bucket, dest_key, multipart_upload_id, task.tag, bg_tasks.size()); + ++num_finished_parts; + LOG_TRACE(log, "Finished writing part #{}. Bucket: {}, Key: {}, Upload_id: {}, Etag: {}, Finished parts: {} of {}", + task.part_number, dest_key, multipart_upload_id, task.tag, bg_tasks.size(), num_finished_parts, num_parts); } - virtual std::unique_ptr fillUploadPartRequest(size_t part_number, size_t part_offset, size_t part_size) = 0; + /// These functions can be called from multiple threads, so derived class needs to take care about synchronization. + virtual std::unique_ptr makeUploadPartRequest(size_t part_number, size_t part_offset, size_t part_size) const = 0; virtual String processUploadPartRequest(Aws::AmazonWebServiceRequest & request) = 0; void waitForAllBackgroundTasks() @@ -581,7 +596,7 @@ namespace void performMultipartUpload() { UploadHelper::performMultipartUpload(offset, size); } - std::unique_ptr fillUploadPartRequest(size_t part_number, size_t part_offset, size_t part_size) override + std::unique_ptr makeUploadPartRequest(size_t part_number, size_t part_offset, size_t part_size) const override { auto read_buffer = std::make_unique(create_read_buffer(), part_offset, part_size); @@ -795,7 +810,7 @@ namespace void performMultipartUploadCopy() { UploadHelper::performMultipartUpload(offset, size); } - std::unique_ptr fillUploadPartRequest(size_t part_number, size_t part_offset, size_t part_size) override + std::unique_ptr makeUploadPartRequest(size_t part_number, size_t part_offset, size_t part_size) const override { auto request = std::make_unique(); diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 8605fdc004a..f58380ed0b7 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h index 8f22e44145a..fcadf34f021 100644 --- a/src/IO/WriteSettings.h +++ b/src/IO/WriteSettings.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index 7210dc6fbbf..ae00bb2e9e2 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -1185,98 +1185,4 @@ String fillStringWithPattern(String pattern, int n) return data; } -TEST_F(WBS3Test, ReadBeyondLastOffset) { - const String remote_file = "ReadBeyondLastOffset"; - - const String key = "1234567812345678"; - const String data = fillStringWithPattern("0123456789", 10); - - ReadSettings disk_read_settings; - disk_read_settings.enable_filesystem_cache = false; - disk_read_settings.local_fs_buffer_size = 70; - disk_read_settings.remote_fs_buffer_size = FileEncryption::Header::kSize + 60; - - { - /// write encrypted file - - FileEncryption::Header header; - header.algorithm = FileEncryption::Algorithm::AES_128_CTR; - header.key_fingerprint = FileEncryption::calculateKeyFingerprint(key); - header.init_vector = FileEncryption::InitVector::random(); - - auto wbs3 = getWriteBuffer(remote_file); - getAsyncPolicy().setAutoExecute(true); - - WriteBufferFromEncryptedFile wb(10, std::move(wbs3), key, header); - wb.write(data.data(), data.size()); - wb.finalize(); - } - - auto reader = std::make_unique(1, 1); - std::unique_ptr encrypted_read_buffer; - - { - /// create encrypted file reader - - auto cache_log = std::shared_ptr(); - const StoredObjects objects = { StoredObject(remote_file, /* local_path */ "", data.size() + FileEncryption::Header::kSize) }; - auto async_read_counters = std::make_shared(); - auto prefetch_log = std::shared_ptr(); - - auto rb_creator = [this, disk_read_settings] (const std::string & path, size_t read_until_position) -> std::unique_ptr - { - S3Settings::RequestSettings request_settings; - return std::make_unique( - client, - bucket, - path, - "Latest", - request_settings, - disk_read_settings, - /* use_external_buffer */true, - /* offset */0, - read_until_position, - /* restricted_seek */true); - }; - - auto rb_remote_fs = std::make_unique( - std::move(rb_creator), - objects, - disk_read_settings, - cache_log, - true); - - auto rb_async = std::make_unique( - std::move(rb_remote_fs), *reader, disk_read_settings, async_read_counters, prefetch_log); - - /// read the header from the buffer - /// as a result AsynchronousBoundedReadBuffer consists some data from the file inside working buffer - FileEncryption::Header header; - header.read(*rb_async); - - ASSERT_EQ(rb_async->available(), disk_read_settings.remote_fs_buffer_size - FileEncryption::Header::kSize); - ASSERT_EQ(rb_async->getPosition(), FileEncryption::Header::kSize); - ASSERT_EQ(rb_async->getFileOffsetOfBufferEnd(), disk_read_settings.remote_fs_buffer_size); - - /// ReadBufferFromEncryptedFile is constructed over a ReadBuffer which was already in use. - /// The 'FileEncryption::Header' has been read from `rb_async`. - /// 'rb_async' will read the data from `rb_async` working buffer - encrypted_read_buffer = std::make_unique( - disk_read_settings.local_fs_buffer_size, std::move(rb_async), key, header); - } - - /// When header is read, file is read into working buffer till some position. Tn the test the file is read until remote_fs_buffer_size (124) position. - /// Set the right border before that position and make sure that encrypted_read_buffer does not have access to it - ASSERT_GT(disk_read_settings.remote_fs_buffer_size, 50); - encrypted_read_buffer->setReadUntilPosition(50); - - /// encrypted_read_buffer reads the data with buffer size `local_fs_buffer_size` - /// If the impl file has read the data beyond the ReadUntilPosition, encrypted_read_buffer does not read it - /// getFileOffsetOfBufferEnd should read data till `ReadUntilPosition` - String res; - readStringUntilEOF(res, *encrypted_read_buffer); - ASSERT_EQ(res, data.substr(0, 50)); - ASSERT_TRUE(encrypted_read_buffer->eof()); -} - #endif diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index e4856c33988..4171818d3e6 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -3054,6 +3054,8 @@ void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( size_t row_end, const AggregateColumnsConstData & aggregate_columns_data) const { + using namespace CurrentMetrics; + AggregatedDataWithoutKey & res = result.without_key; if (!res) { @@ -3062,11 +3064,26 @@ void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( res = place; } + ThreadPool thread_pool{AggregatorThreads, AggregatorThreadsActive, AggregatorThreadsScheduled, params.max_threads}; + for (size_t row = row_begin; row < row_end; ++row) { /// Adding Values for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->merge(res + offsets_of_aggregate_states[i], (*aggregate_columns_data[i])[row], result.aggregates_pool); + { + if (aggregate_functions[i]->isParallelizeMergePrepareNeeded()) + { + std::vector data_vec{res + offsets_of_aggregate_states[i], (*aggregate_columns_data[i])[row]}; + aggregate_functions[i]->parallelizeMergePrepare(data_vec, thread_pool); + } + + if (aggregate_functions[i]->isAbleToParallelizeMerge()) + aggregate_functions[i]->merge( + res + offsets_of_aggregate_states[i], (*aggregate_columns_data[i])[row], thread_pool, result.aggregates_pool); + else + aggregate_functions[i]->merge( + res + offsets_of_aggregate_states[i], (*aggregate_columns_data[i])[row], result.aggregates_pool); + } } } diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 4edc9d4d4e5..f8a070a6fde 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -117,13 +117,13 @@ void SelectStreamFactory::createForShard( std::vector & local_plans, Shards & remote_shards, UInt32 shard_count, - bool parallel_replicas_enabled) + bool parallel_replicas_enabled, + AdditionalShardFilterGenerator shard_filter_generator) { auto it = objects_by_shard.find(shard_info.shard_num); if (it != objects_by_shard.end()) replaceMissedSubcolumnsByConstants(storage_snapshot->object_columns, it->second, query_ast); - auto emplace_local_stream = [&]() { local_plans.emplace_back(createLocalPlan( @@ -139,6 +139,7 @@ void SelectStreamFactory::createForShard( .shard_info = shard_info, .lazy = lazy, .local_delay = local_delay, + .shard_filter_generator = std::move(shard_filter_generator), }); }; diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 511b0dfaadb..9993ea7028d 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -40,6 +40,7 @@ ASTPtr rewriteSelectQuery( ASTPtr table_function_ptr = nullptr); using ColumnsDescriptionByShardNum = std::unordered_map; +using AdditionalShardFilterGenerator = std::function; class SelectStreamFactory { @@ -59,6 +60,7 @@ public: /// (When there is a local replica with big delay). bool lazy = false; time_t local_delay = 0; + AdditionalShardFilterGenerator shard_filter_generator{}; }; using Shards = std::vector; @@ -78,7 +80,8 @@ public: std::vector & local_plans, Shards & remote_shards, UInt32 shard_count, - bool parallel_replicas_enabled); + bool parallel_replicas_enabled, + AdditionalShardFilterGenerator shard_filter_generator); const Block header; const ColumnsDescriptionByShardNum objects_by_shard; diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 99453f224ff..b3a48e3e611 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -158,6 +158,13 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, new_settings.timeout_overflow_mode = settings.timeout_overflow_mode_leaf; } + /// in case of parallel replicas custom key use round robing load balancing + /// so custom key partitions will be spread over nodes in round-robin fashion + if (context->canUseParallelReplicasCustomKey(cluster) && !settings.load_balancing.changed) + { + new_settings.load_balancing = LoadBalancing::ROUND_ROBIN; + } + auto new_context = Context::createCopy(context); new_context->setSettings(new_settings); return new_context; @@ -247,21 +254,6 @@ void executeQuery( visitor.visit(query_ast_for_shard); } - if (shard_filter_generator) - { - auto shard_filter = shard_filter_generator(shard_info.shard_num); - if (shard_filter) - { - auto & select_query = query_ast_for_shard->as(); - - auto where_expression = select_query.where(); - if (where_expression) - shard_filter = makeASTFunction("and", where_expression, shard_filter); - - select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(shard_filter)); - } - } - // decide for each shard if parallel reading from replicas should be enabled // according to settings and number of replicas declared per shard const auto & addresses = cluster->getShardsAddresses().at(i); @@ -276,7 +268,8 @@ void executeQuery( plans, remote_shards, static_cast(shards), - parallel_replicas_enabled); + parallel_replicas_enabled, + shard_filter_generator); } if (!remote_shards.empty()) @@ -329,6 +322,7 @@ void executeQuery( void executeQueryWithParallelReplicas( QueryPlan & query_plan, + const StorageID & main_table, SelectStreamFactory & stream_factory, const ASTPtr & query_ast, ContextPtr context, @@ -420,6 +414,7 @@ void executeQueryWithParallelReplicas( std::move(coordinator), stream_factory.header, stream_factory.processed_stage, + main_table, new_context, getThrottler(new_context), std::move(scalars), diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index dd48b6e10ad..b5ee4a11df6 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -65,11 +65,12 @@ void executeQuery( const std::string & sharding_key_column_name, const ClusterPtr & not_optimized_cluster, const DistributedSettings & distributed_settings, - AdditionalShardFilterGenerator shard_filter_generator = {}); + AdditionalShardFilterGenerator shard_filter_generator); void executeQueryWithParallelReplicas( QueryPlan & query_plan, + const StorageID & main_table, SelectStreamFactory & stream_factory, const ASTPtr & query_ast, ContextPtr context, diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 13a7618e461..7e89c794712 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -59,7 +59,7 @@ #include #include #include -#include +#include #include #include #include @@ -2551,17 +2551,24 @@ BackupsWorker & Context::getBackupsWorker() const const auto & config = getConfigRef(); const bool allow_concurrent_backups = config.getBool("backups.allow_concurrent_backups", true); const bool allow_concurrent_restores = config.getBool("backups.allow_concurrent_restores", true); + const bool test_inject_sleep = config.getBool("backups.test_inject_sleep", false); const auto & settings_ref = getSettingsRef(); UInt64 backup_threads = config.getUInt64("backup_threads", settings_ref.backup_threads); UInt64 restore_threads = config.getUInt64("restore_threads", settings_ref.restore_threads); - shared->backups_worker.emplace(getGlobalContext(), backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores); + shared->backups_worker.emplace(getGlobalContext(), backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores, test_inject_sleep); }); return *shared->backups_worker; } +void Context::waitAllBackupsAndRestores() const +{ + if (shared->backups_worker) + shared->backups_worker->waitAll(); +} + void Context::setProgressCallback(ProgressCallback callback) { @@ -5113,6 +5120,12 @@ bool Context::canUseParallelReplicasOnFollower() const return canUseTaskBasedParallelReplicas() && getClientInfo().collaborate_with_initiator; } +bool Context::canUseParallelReplicasCustomKey(const Cluster & cluster) const +{ + return settings.max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY + && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1; +} + void Context::setPreparedSetsCache(const PreparedSetsCachePtr & cache) { prepared_sets_cache = cache; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index e91db7faa7b..65566876a80 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -819,6 +819,7 @@ public: #endif BackupsWorker & getBackupsWorker() const; + void waitAllBackupsAndRestores() const; /// I/O formats. InputFormatPtr getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size, @@ -1246,6 +1247,7 @@ public: bool canUseTaskBasedParallelReplicas() const; bool canUseParallelReplicasOnInitiator() const; bool canUseParallelReplicasOnFollower() const; + bool canUseParallelReplicasCustomKey(const Cluster & cluster) const; enum class ParallelReplicasMode : uint8_t { diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index 080878c7d86..46171c95cb0 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -27,7 +27,8 @@ ExternalDictionariesLoader::ExternalDictionariesLoader(ContextPtr global_context { setConfigSettings({"dictionary", "name", "database", "uuid"}); enableAsyncLoading(true); - enablePeriodicUpdates(true); + if (getContext()->getApplicationType() == Context::ApplicationType::SERVER) + enablePeriodicUpdates(true); } ExternalLoader::LoadablePtr ExternalDictionariesLoader::create( diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp index 4aa87346e80..60110916760 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp @@ -54,6 +54,7 @@ static StreamLocalLimits getLimitsForStorage(const Settings & settings, const Se limits.speed_limits.max_execution_rps = settings.max_execution_speed; limits.speed_limits.max_execution_bps = settings.max_execution_speed_bytes; limits.speed_limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; + limits.speed_limits.max_estimated_execution_time = settings.max_estimated_execution_time; return limits; } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index c5790b08a24..c0e9aeaae1d 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -589,9 +589,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( } } else if (auto * distributed = dynamic_cast(storage.get()); - distributed && canUseCustomKey(settings, *distributed->getCluster(), *context)) + distributed && context->canUseParallelReplicasCustomKey(*distributed->getCluster())) { - query_info.use_custom_key = true; context->setSetting("distributed_group_by_no_merge", 2); } } diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 752d3c4a12b..2b84b7655b3 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -254,6 +254,7 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q res = std::make_shared(*this, process_it); (*process_it)->setUserProcessList(&user_process_list); + (*process_it)->setProcessListEntry(res); user_process_list.queries.emplace(client_info.current_query_id, res->getQueryStatus()); queries_to_user.emplace(client_info.current_query_id, client_info.current_user); @@ -481,6 +482,22 @@ void QueryStatus::setUserProcessList(ProcessListForUser * user_process_list_) } +void QueryStatus::setProcessListEntry(std::weak_ptr process_list_entry_) +{ + /// Synchronization is not required here because this function is only called from ProcessList::insert() + /// when `ProcessList::mutex` is locked. + if (!process_list_entry.expired() && !process_list_entry_.expired()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Two entries in the process list cannot both use the same query status"); + process_list_entry = process_list_entry_; +} + + +std::shared_ptr QueryStatus::getProcessListEntry() const +{ + return process_list_entry.lock(); +} + + ThrottlerPtr QueryStatus::getUserNetworkThrottler() { if (!user_process_list) @@ -541,6 +558,28 @@ CancellationCode ProcessList::sendCancelToQuery(const String & current_query_id, } +CancellationCode ProcessList::sendCancelToQuery(QueryStatusPtr elem, bool kill) +{ + /// Cancelling the query should be done without the lock. + /// So here we first set is_cancelling, and later reset it. + /// The ProcessListEntry cannot be destroy if is_cancelling is true. + { + auto lock = safeLock(); + elem->is_cancelling = true; + } + + SCOPE_EXIT({ + DENY_ALLOCATIONS_IN_SCOPE; + + auto lock = unsafeLock(); + elem->is_cancelling = false; + cancelled_cv.notify_all(); + }); + + return elem->cancelQuery(kill); +} + + void ProcessList::killAllQueries() { std::vector cancelled_processes; diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 7b0196db981..fe4ee934ed8 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -118,6 +118,11 @@ protected: /// Be careful using it. For example, queries field of ProcessListForUser could be modified concurrently. const ProcessListForUser * getUserProcessList() const { return user_process_list; } + /// Sets an entry in the ProcessList associated with this QueryStatus. + /// Be careful using it (this function contains no synchronization). + /// A weak pointer is used here because it's a ProcessListEntry which owns this QueryStatus, and not vice versa. + void setProcessListEntry(std::weak_ptr process_list_entry_); + mutable std::mutex executors_mutex; struct ExecutorHolder @@ -148,6 +153,8 @@ protected: ProcessListForUser * user_process_list = nullptr; + std::weak_ptr process_list_entry; + OvercommitTracker * global_overcommit_tracker = nullptr; /// This is used to control the maximum number of SELECT or INSERT queries. @@ -219,6 +226,9 @@ public: bool isKilled() const { return is_killed; } + /// Returns an entry in the ProcessList associated with this QueryStatus. The function can return nullptr. + std::shared_ptr getProcessListEntry() const; + bool isAllDataSent() const { return is_all_data_sent; } void setAllDataSent() { is_all_data_sent = true; } @@ -450,6 +460,7 @@ public: /// Try call cancel() for input and output streams of query with specified id and user CancellationCode sendCancelToQuery(const String & current_query_id, const String & current_user, bool kill = false); + CancellationCode sendCancelToQuery(QueryStatusPtr elem, bool kill = false); void killAllQueries(); }; diff --git a/src/Interpreters/RewriteSumFunctionWithSumAndCountVisitor.cpp b/src/Interpreters/RewriteSumFunctionWithSumAndCountVisitor.cpp new file mode 100644 index 00000000000..b654d28d750 --- /dev/null +++ b/src/Interpreters/RewriteSumFunctionWithSumAndCountVisitor.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +void RewriteSumFunctionWithSumAndCountMatcher::visit(ASTPtr & ast, const Data & data) +{ + if (auto * func = ast->as()) + visit(*func, ast, data); +} + +/** Rewrites `sum(column +/- literal)` into two individual functions + * `sum(column)` and `literal * count(column)`. + * sum(column + literal) -> sum(column) + literal * count(column) + * sum(literal + column) -> literal * count(column) + sum(column) + * sum(column - literal) -> sum(column) - literal * count(column) + * sum(literal - column) -> literal * count(column) - sum(column) + */ +void RewriteSumFunctionWithSumAndCountMatcher::visit(const ASTFunction & function, ASTPtr & ast, const Data & data) +{ + static const std::unordered_set function_supported = { + "plus", + "minus" + }; + + if (!function.arguments || Poco::toLower(function.name) != "sum" || function.arguments->children.size() != 1) + return; + + const auto * func_plus_minus = function.arguments->children[0]->as(); + + if (!func_plus_minus || !function_supported.contains(Poco::toLower(func_plus_minus->name)) || func_plus_minus->arguments->children.size() != 2) + return; + + size_t column_id; + if (func_plus_minus->arguments->children[0]->as() && func_plus_minus->arguments->children[1]->as()) + column_id = 0; + else if (func_plus_minus->arguments->children[0]->as() && func_plus_minus->arguments->children[1]->as()) + column_id = 1; + else + return; + + size_t literal_id = 1 - column_id; + const auto * literal = func_plus_minus->arguments->children[literal_id]->as(); + if (!literal) + return; + + ///all the types listed are numbers and supported by 'plus' and 'minus'. + Field::Types::Which literal_type = literal->value.getType(); + if (literal_type != Field::Types::UInt64 && + literal_type != Field::Types::Int64 && + literal_type != Field::Types::UInt128 && + literal_type != Field::Types::Int128 && + literal_type != Field::Types::UInt256 && + literal_type != Field::Types::Int256 && + literal_type != Field::Types::Float64 && + literal_type != Field::Types::Decimal32 && + literal_type != Field::Types::Decimal64 && + literal_type != Field::Types::Decimal128 && + literal_type != Field::Types::Decimal256) + return; + + const auto * column = func_plus_minus->arguments->children[column_id]->as(); + if (!column) + return; + + auto pos = IdentifierSemantic::getMembership(*column); + if (!pos) + pos = IdentifierSemantic::chooseTableColumnMatch(*column, data.tables, true); + if (!pos) + return; + + if (*pos >= data.tables.size()) + return; + + auto column_type_name = data.tables[*pos].columns.tryGetByName(column->shortName()); + if (!column_type_name) + return; + + const auto column_type = column_type_name->type; + if (!column_type || !isNumber(*column_type)) + return; + + const String & column_name = column_type_name->name; + + if (column_id == 0) + { + const auto new_ast = makeASTFunction(func_plus_minus->name, + makeASTFunction("sum", + std::make_shared(column_name) + ), + makeASTFunction("multiply", + std::make_shared(* literal), + makeASTFunction("count", std::make_shared(column_name)) + ) + ); + if (!new_ast) + return; + else + ast = new_ast; + } + else if (column_id == 1) + { + const auto new_ast = makeASTFunction(func_plus_minus->name, + makeASTFunction("multiply", + std::make_shared(* literal), + makeASTFunction("count", std::make_shared(column_name)) + ), + makeASTFunction("sum", + std::make_shared(column_name) + ) + ); + if (!new_ast) + return; + else + ast = new_ast; + } +} + +} diff --git a/src/Interpreters/RewriteSumFunctionWithSumAndCountVisitor.h b/src/Interpreters/RewriteSumFunctionWithSumAndCountVisitor.h new file mode 100644 index 00000000000..68bc5e0ba31 --- /dev/null +++ b/src/Interpreters/RewriteSumFunctionWithSumAndCountVisitor.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ASTFunction; + +class RewriteSumFunctionWithSumAndCountMatcher +{ +public: + struct Data + { + const TablesWithColumns & tables; + }; + + static void visit(ASTPtr & ast, const Data & data); + static void visit(const ASTFunction &, ASTPtr & ast, const Data & data); + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } +}; + +using RewriteSumFunctionWithSumAndCountVisitor = InDepthNodeVisitor; +} diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index c06fe8f5c90..954368db312 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -660,7 +660,7 @@ ASTPtr SystemLog::getCreateTableQuery() if (endsWith(engine.name, "MergeTree")) { auto storage_settings = std::make_unique(getContext()->getMergeTreeSettings()); - storage_settings->loadFromQuery(*create->storage, getContext()); + storage_settings->loadFromQuery(*create->storage, getContext(), false); } return create; diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index b740852b808..de3b72f4801 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -641,6 +642,12 @@ void optimizeDateFilters(ASTSelectQuery * select_query, const std::vector & tables_with_columns) +{ + RewriteSumFunctionWithSumAndCountVisitor::Data data = {tables_with_columns}; + RewriteSumFunctionWithSumAndCountVisitor(data).visit(query); +} + void transformIfStringsIntoEnum(ASTPtr & query) { std::unordered_set function_names = {"if", "transform"}; @@ -744,6 +751,9 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, tables_with_columns, result.storage_snapshot->metadata, result.storage); } + /// Rewrite sum(column +/- literal) function with sum(column) +/- literal * count(column). + rewriteSumFunctionWithSumAndCount(query, tables_with_columns); + /// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, etc. optimizeDateFilters(select_query, tables_with_columns, context); diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp index 2e9ee0af724..1295a4d5a75 100644 --- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp +++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp @@ -20,12 +20,6 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER; } -bool canUseCustomKey(const Settings & settings, const Cluster & cluster, const Context & context) -{ - return settings.max_parallel_replicas > 1 && context.getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY - && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1; -} - ASTPtr getCustomKeyFilterForParallelReplica( size_t replicas_count, size_t replica_num, @@ -34,7 +28,7 @@ ASTPtr getCustomKeyFilterForParallelReplica( const ColumnsDescription & columns, const ContextPtr & context) { - assert(replicas_count > 1); + chassert(replicas_count > 1); if (filter_type == ParallelReplicasCustomKeyFilterType::DEFAULT) { // first we do modulo with replica count diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h index c35f00f3dfd..1506c1992c0 100644 --- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.h +++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.h @@ -9,9 +9,6 @@ namespace DB { - -bool canUseCustomKey(const Settings & settings, const Cluster & cluster, const Context & context); - /// Get AST for filter created from custom_key /// replica_num is the number of the replica for which we are generating filter starting from 0 ASTPtr getCustomKeyFilterForParallelReplica( diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 594221fe050..eeb76e3bb9e 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1451,6 +1451,7 @@ const char * ParserAlias::restricted_keywords[] = "ASOF", "BETWEEN", "CROSS", + "PASTE", "FINAL", "FORMAT", "FROM", diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 73a6bc5d810..5ee7bd3e0ba 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -257,8 +257,9 @@ public: && !query_node.isGroupByWithTotals() && !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); aggregation_with_rollup_or_cube_or_grouping_sets = query_node.isGroupByWithRollup() || query_node.isGroupByWithCube() || query_node.isGroupByWithGroupingSets(); - aggregation_should_produce_results_in_order_of_bucket_number = query_processing_info.getToStage() == QueryProcessingStage::WithMergeableState && - settings.distributed_aggregation_memory_efficient; + aggregation_should_produce_results_in_order_of_bucket_number + = query_processing_info.getToStage() == QueryProcessingStage::WithMergeableState + && (settings.distributed_aggregation_memory_efficient || settings.enable_memory_bound_merging_of_aggregation_results); query_has_array_join_in_join_tree = queryHasArrayJoinInJoinTree(query_tree); query_has_with_totals_in_any_subquery_in_join_tree = queryHasWithTotalsInAnySubqueryInJoinTree(query_tree); @@ -1390,7 +1391,7 @@ void Planner::buildPlanForQueryNode() } } - if (!settings.parallel_replicas_custom_key.value.empty()) + if (query_context->canUseTaskBasedParallelReplicas() || !settings.parallel_replicas_custom_key.value.empty()) { /// Check support for JOIN for parallel replicas with custom key if (planner_context->getTableExpressionNodeToData().size() > 1) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 394cd8a0669..552f25d7035 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -809,9 +809,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres else { if (auto * distributed = typeid_cast(storage.get()); - distributed && canUseCustomKey(settings, *distributed->getCluster(), *query_context)) + distributed && query_context->canUseParallelReplicasCustomKey(*distributed->getCluster())) { - table_expression_query_info.use_custom_key = true; planner_context->getMutableQueryContext()->setSetting("distributed_group_by_no_merge", 2); } } diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 2df5915c72a..63f68ccf838 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -179,6 +179,7 @@ StreamLocalLimits getLimitsForStorage(const Settings & settings, const SelectQue limits.speed_limits.max_execution_rps = settings.max_execution_speed; limits.speed_limits.max_execution_bps = settings.max_execution_speed_bytes; limits.speed_limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; + limits.speed_limits.max_estimated_execution_time = settings.max_estimated_execution_time; return limits; } diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 46d1c426ef4..5722c660071 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -987,7 +987,10 @@ private: LOG_TRACE((&Poco::Logger::get("AvroConfluentRowInputFormat")), "Fetching schema id = {} from url {}", id, url.toString()); /// One second for connect/send/receive. Just in case. - ConnectionTimeouts timeouts({1, 0}, {1, 0}, {1, 0}); + auto timeouts = ConnectionTimeouts() + .withConnectionTimeout(1) + .withSendTimeout(1) + .withReceiveTimeout(1); Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, url.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); request.setHost(url.getHost()); diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 496ec083cac..43dcca6d59b 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -155,9 +156,9 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream -> QueryPipelineBuilder { auto current_settings = my_context->getSettingsRef(); - auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover( - current_settings).getSaturated( - current_settings.max_execution_time); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings) + .getSaturated(current_settings.max_execution_time); + std::vector try_results; try { @@ -231,8 +232,6 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact add_extremes = context->getSettingsRef().extremes; } - String query_string = formattedAST(shard.query); - scalars["_shard_num"] = Block{{DataTypeUInt32().createColumnConst(1, shard.shard_info.shard_num), std::make_shared(), "_shard_num"}}; @@ -254,29 +253,81 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact context->setSetting("cluster_for_parallel_replicas", cluster_name); } - auto remote_query_executor = std::make_shared( - shard.shard_info.pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage); - remote_query_executor->setLogger(log); - - if (context->canUseTaskBasedParallelReplicas()) + /// parallel replicas custom key case + if (shard.shard_filter_generator) { - // when doing parallel reading from replicas (ParallelReplicasMode::READ_TASKS) on a shard: - // establish a connection to a replica on the shard, the replica will instantiate coordinator to manage parallel reading from replicas on the shard. - // The coordinator will return query result from the shard. - // Only one coordinator per shard is necessary. Therefore using PoolMode::GET_ONE to establish only one connection per shard. - // Using PoolMode::GET_MANY for this mode will(can) lead to instantiation of several coordinators (depends on max_parallel_replicas setting) - // each will execute parallel reading from replicas, so the query result will be multiplied by the number of created coordinators - remote_query_executor->setPoolMode(PoolMode::GET_ONE); + for (size_t i = 0; i < shard.shard_info.per_replica_pools.size(); ++i) + { + auto query = shard.query->clone(); + auto & select_query = query->as(); + auto shard_filter = shard.shard_filter_generator(i + 1); + if (shard_filter) + { + auto where_expression = select_query.where(); + if (where_expression) + shard_filter = makeASTFunction("and", where_expression, shard_filter); + + select_query.setExpression(ASTSelectQuery::Expression::WHERE, std::move(shard_filter)); + } + + const String query_string = formattedAST(query); + + if (!priority_func_factory.has_value()) + priority_func_factory = GetPriorityForLoadBalancing(LoadBalancing::ROUND_ROBIN, randomSeed()); + + GetPriorityForLoadBalancing::Func priority_func + = priority_func_factory->getPriorityFunc(LoadBalancing::ROUND_ROBIN, 0, shard.shard_info.pool->getPoolSize()); + + auto remote_query_executor = std::make_shared( + shard.shard_info.pool, + query_string, + output_stream->header, + context, + throttler, + scalars, + external_tables, + stage, + std::nullopt, + priority_func); + remote_query_executor->setLogger(log); + remote_query_executor->setPoolMode(PoolMode::GET_ONE); + + if (!table_func_ptr) + remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table); + + pipes.emplace_back( + createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); + addConvertingActions(pipes.back(), output_stream->header); + } } else - remote_query_executor->setPoolMode(PoolMode::GET_MANY); + { + const String query_string = formattedAST(shard.query); - if (!table_func_ptr) - remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table); + auto remote_query_executor = std::make_shared( + shard.shard_info.pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage); + remote_query_executor->setLogger(log); - pipes.emplace_back( - createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); - addConvertingActions(pipes.back(), output_stream->header); + if (context->canUseTaskBasedParallelReplicas()) + { + // when doing parallel reading from replicas (ParallelReplicasMode::READ_TASKS) on a shard: + // establish a connection to a replica on the shard, the replica will instantiate coordinator to manage parallel reading from replicas on the shard. + // The coordinator will return query result from the shard. + // Only one coordinator per shard is necessary. Therefore using PoolMode::GET_ONE to establish only one connection per shard. + // Using PoolMode::GET_MANY for this mode will(can) lead to instantiation of several coordinators (depends on max_parallel_replicas setting) + // each will execute parallel reading from replicas, so the query result will be multiplied by the number of created coordinators + remote_query_executor->setPoolMode(PoolMode::GET_ONE); + } + else + remote_query_executor->setPoolMode(PoolMode::GET_MANY); + + if (!table_func_ptr) + remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table); + + pipes.emplace_back( + createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); + addConvertingActions(pipes.back(), output_stream->header); + } } void ReadFromRemote::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) @@ -306,6 +357,7 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( ParallelReplicasReadingCoordinatorPtr coordinator_, Block header_, QueryProcessingStage::Enum stage_, + StorageID main_table_, ContextMutablePtr context_, ThrottlerPtr throttler_, Scalars scalars_, @@ -317,6 +369,7 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( , query_ast(query_ast_) , coordinator(std::move(coordinator_)) , stage(std::move(stage_)) + , main_table(std::move(main_table_)) , context(context_) , throttler(throttler_) , scalars(scalars_) diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index cbdc3e2f542..f6fd7b3a630 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -60,6 +60,7 @@ private: Poco::Logger * log; UInt32 shard_count; const String cluster_name; + std::optional priority_func_factory; void addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard); void addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard); @@ -75,6 +76,7 @@ public: ParallelReplicasReadingCoordinatorPtr coordinator_, Block header_, QueryProcessingStage::Enum stage_, + StorageID main_table_, ContextMutablePtr context_, ThrottlerPtr throttler_, Scalars scalars_, @@ -97,6 +99,7 @@ private: ASTPtr query_ast; ParallelReplicasReadingCoordinatorPtr coordinator; QueryProcessingStage::Enum stage; + StorageID main_table; ContextMutablePtr context; ThrottlerPtr throttler; Scalars scalars; diff --git a/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp b/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp new file mode 100644 index 00000000000..4a257bba922 --- /dev/null +++ b/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp @@ -0,0 +1,43 @@ +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int QUERY_NOT_ALLOWED; +} + +ReadFromStreamLikeEngine::ReadFromStreamLikeEngine( + const Names & column_names_, + const StorageSnapshotPtr & storage_snapshot_, + std::shared_ptr storage_limits_, + ContextPtr context_) + : ISourceStep{DataStream{.header = storage_snapshot_->getSampleBlockForColumns(column_names_)}} + , WithContext{context_} + , storage_limits{std::move(storage_limits_)} +{ +} + +void ReadFromStreamLikeEngine::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + if (!getContext()->getSettingsRef().stream_like_engine_allow_direct_select) + throw Exception( + ErrorCodes::QUERY_NOT_ALLOWED, "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); + + auto pipe = makePipe(); + + /// Add storage limits. + for (const auto & processor : pipe.getProcessors()) + processor->setStorageLimits(storage_limits); + + /// Add to processors to get processor info through explain pipeline statement. + for (const auto & processor : pipe.getProcessors()) + processors.emplace_back(processor); + + pipeline.init(std::move(pipe)); +} +} diff --git a/src/Processors/QueryPlan/ReadFromStreamLikeEngine.h b/src/Processors/QueryPlan/ReadFromStreamLikeEngine.h new file mode 100644 index 00000000000..2416dca8004 --- /dev/null +++ b/src/Processors/QueryPlan/ReadFromStreamLikeEngine.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ +class ReadFromStreamLikeEngine : public ISourceStep, protected WithContext +{ +public: + ReadFromStreamLikeEngine( + const Names & column_names_, + const StorageSnapshotPtr & storage_snapshot_, + std::shared_ptr storage_limits_, + ContextPtr context_); + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & /*settings*/) final; + +protected: + virtual Pipe makePipe() = 0; + + std::shared_ptr storage_limits; +}; +} diff --git a/src/QueryPipeline/ExecutionSpeedLimits.cpp b/src/QueryPipeline/ExecutionSpeedLimits.cpp index 9ceaa4921c7..f8ae4c76d0f 100644 --- a/src/QueryPipeline/ExecutionSpeedLimits.cpp +++ b/src/QueryPipeline/ExecutionSpeedLimits.cpp @@ -78,17 +78,17 @@ void ExecutionSpeedLimits::throttle( read_bytes / elapsed_seconds, min_execution_bps); - /// If the predicted execution time is longer than `max_execution_time`. - if (max_execution_time != 0 && total_rows_to_read && read_rows) + /// If the predicted execution time is longer than `max_estimated_execution_time`. + if (max_estimated_execution_time != 0 && total_rows_to_read && read_rows) { double estimated_execution_time_seconds = elapsed_seconds * (static_cast(total_rows_to_read) / read_rows); - if (timeout_overflow_mode == OverflowMode::THROW && estimated_execution_time_seconds > max_execution_time.totalSeconds()) + if (timeout_overflow_mode == OverflowMode::THROW && estimated_execution_time_seconds > max_estimated_execution_time.totalSeconds()) throw Exception( ErrorCodes::TOO_SLOW, "Estimated query execution time ({} seconds) is too long. Maximum: {}. Estimated rows to process: {}", estimated_execution_time_seconds, - max_execution_time.totalSeconds(), + max_estimated_execution_time.totalSeconds(), total_rows_to_read); } diff --git a/src/QueryPipeline/ExecutionSpeedLimits.h b/src/QueryPipeline/ExecutionSpeedLimits.h index eed8b5c3248..0def483123a 100644 --- a/src/QueryPipeline/ExecutionSpeedLimits.h +++ b/src/QueryPipeline/ExecutionSpeedLimits.h @@ -21,6 +21,7 @@ public: size_t max_execution_bps = 0; Poco::Timespan max_execution_time = 0; + Poco::Timespan max_estimated_execution_time = 0; /// Verify that the speed is not too low after the specified time has elapsed. Poco::Timespan timeout_before_checking_execution_speed = 0; diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 0a8df5d1d34..136a3bb09c6 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -43,13 +43,24 @@ namespace ErrorCodes } RemoteQueryExecutor::RemoteQueryExecutor( - const String & query_, const Block & header_, ContextPtr context_, - const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::optional extension_) - : header(header_), query(query_), context(context_), scalars(scalars_) - , external_tables(external_tables_), stage(stage_) + const String & query_, + const Block & header_, + ContextPtr context_, + const Scalars & scalars_, + const Tables & external_tables_, + QueryProcessingStage::Enum stage_, + std::optional extension_, + GetPriorityForLoadBalancing::Func priority_func_) + : header(header_) + , query(query_) + , context(context_) + , scalars(scalars_) + , external_tables(external_tables_) + , stage(stage_) , extension(extension_) -{} + , priority_func(priority_func_) +{ +} RemoteQueryExecutor::RemoteQueryExecutor( Connection & connection, @@ -100,10 +111,16 @@ RemoteQueryExecutor::RemoteQueryExecutor( RemoteQueryExecutor::RemoteQueryExecutor( const ConnectionPoolWithFailoverPtr & pool, - const String & query_, const Block & header_, ContextPtr context_, - const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::optional extension_) - : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_) + const String & query_, + const Block & header_, + ContextPtr context_, + const ThrottlerPtr & throttler, + const Scalars & scalars_, + const Tables & external_tables_, + QueryProcessingStage::Enum stage_, + std::optional extension_, + GetPriorityForLoadBalancing::Func priority_func_) + : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_, priority_func_) { create_connections = [this, pool, throttler](AsyncCallback async_callback)->std::unique_ptr { @@ -117,7 +134,8 @@ RemoteQueryExecutor::RemoteQueryExecutor( if (main_table) table_to_check = std::make_shared(main_table.getQualifiedName()); - auto res = std::make_unique(pool, context, timeouts, throttler, pool_mode, table_to_check, std::move(async_callback)); + auto res = std::make_unique( + pool, context, timeouts, throttler, pool_mode, table_to_check, std::move(async_callback), priority_func); if (extension && extension->replica_info) res->setReplicaInfo(*extension->replica_info); return res; @@ -137,14 +155,16 @@ RemoteQueryExecutor::RemoteQueryExecutor( pool_mode, main_table.getQualifiedName(), std::move(async_callback), - skip_unavailable_endpoints); + skip_unavailable_endpoints, + priority_func); connection_entries.reserve(try_results.size()); for (auto & try_result : try_results) connection_entries.emplace_back(std::move(try_result.entry)); } else { - connection_entries = pool->getMany(timeouts, current_settings, pool_mode, std::move(async_callback), skip_unavailable_endpoints); + connection_entries = pool->getMany( + timeouts, current_settings, pool_mode, std::move(async_callback), skip_unavailable_endpoints, priority_func); } auto res = std::make_unique(std::move(connection_entries), current_settings, throttler); @@ -696,6 +716,7 @@ void RemoteQueryExecutor::sendExternalTables() limits.mode = LimitsMode::LIMITS_TOTAL; limits.speed_limits.max_execution_time = settings.max_execution_time; limits.timeout_overflow_mode = settings.timeout_overflow_mode; + limits.speed_limits.max_estimated_execution_time = settings.max_estimated_execution_time; for (size_t i = 0; i < count; ++i) { diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index 49ea40bf4b6..5a8ccc2592b 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -50,6 +50,7 @@ public: std::shared_ptr task_iterator = nullptr; std::shared_ptr parallel_reading_coordinator = nullptr; std::optional replica_info = {}; + GetPriorityForLoadBalancing::Func priority_func; }; /// Takes already set connection. @@ -76,9 +77,15 @@ public: /// Takes a pool and gets one or several connections from it. RemoteQueryExecutor( const ConnectionPoolWithFailoverPtr & pool, - const String & query_, const Block & header_, ContextPtr context_, - const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), - QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::optional extension_ = std::nullopt); + const String & query_, + const Block & header_, + ContextPtr context_, + const ThrottlerPtr & throttler = nullptr, + const Scalars & scalars_ = Scalars(), + const Tables & external_tables_ = Tables(), + QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, + std::optional extension_ = std::nullopt, + GetPriorityForLoadBalancing::Func priority_func = {}); ~RemoteQueryExecutor(); @@ -191,9 +198,14 @@ public: private: RemoteQueryExecutor( - const String & query_, const Block & header_, ContextPtr context_, - const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::optional extension_); + const String & query_, + const Block & header_, + ContextPtr context_, + const Scalars & scalars_, + const Tables & external_tables_, + QueryProcessingStage::Enum stage_, + std::optional extension_, + GetPriorityForLoadBalancing::Func priority_func = {}); Block header; Block totals; @@ -273,6 +285,8 @@ private: Poco::Logger * log = nullptr; + GetPriorityForLoadBalancing::Func priority_func; + /// Send all scalars to remote servers void sendScalars(); diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index 8072c09fe27..8098671a903 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -19,6 +19,13 @@ void WriteBufferFromHTTPServerResponse::startSendHeaders() if (response.getChunkedTransferEncoding()) setChunked(); + else if (response.getContentLength() == Poco::Net::HTTPMessage::UNKNOWN_CONTENT_LENGTH) + { + /// In case there is no Content-Length we cannot use keep-alive, + /// since there is no way to know when the server send all the + /// data, so "Connection: close" should be sent. + response.setKeepAlive(false); + } if (add_cors_header) response.set("Access-Control-Allow-Origin", "*"); diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 12caad5eea1..8690ec9121e 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -23,6 +23,10 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe const auto & config = server.config(); unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); + /// In order to make keep-alive works. + if (request.getVersion() == HTTPServerRequest::HTTP_1_1) + response.setChunkedTransferEncoding(true); + setResponseDefaultHeaders(response, keep_alive_timeout); response.setContentType("text/plain; version=0.0.4; charset=UTF-8"); diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 2eea619d654..ef776a3d313 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -1,7 +1,7 @@ #include +#include #include #include -#include #include #include #include @@ -13,9 +13,12 @@ #include #include #include +#include +#include #include #include #include +#include #include #include #include @@ -50,6 +53,76 @@ namespace static constexpr auto TMP_SUFFIX = ".tmp"; + +class ReadFromStorageFileLog final : public ReadFromStreamLikeEngine +{ +public: + ReadFromStorageFileLog( + const Names & column_names_, + StoragePtr storage_, + const StorageSnapshotPtr & storage_snapshot_, + SelectQueryInfo & query_info, + ContextPtr context_) + : ReadFromStreamLikeEngine{column_names_, storage_snapshot_, query_info.storage_limits, context_} + , column_names{column_names_} + , storage{storage_} + , storage_snapshot{storage_snapshot_} + { + } + + String getName() const override { return "ReadFromStorageFileLog"; } + +private: + Pipe makePipe() final + { + auto & file_log = storage->as(); + if (file_log.mv_attached) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageFileLog with attached materialized views"); + + std::lock_guard lock(file_log.file_infos_mutex); + if (file_log.running_streams) + throw Exception(ErrorCodes::CANNOT_SELECT, "Another select query is running on this table, need to wait it finish."); + + file_log.updateFileInfos(); + + /// No files to parse + if (file_log.file_infos.file_names.empty()) + { + LOG_WARNING(file_log.log, "There is a idle table named {}, no files need to parse.", getName()); + return Pipe{}; + } + + auto modified_context = Context::createCopy(getContext()); + + auto max_streams_number = std::min(file_log.filelog_settings->max_threads, file_log.file_infos.file_names.size()); + + /// Each stream responsible for closing it's files and store meta + file_log.openFilesAndSetPos(); + + Pipes pipes; + pipes.reserve(max_streams_number); + for (size_t stream_number = 0; stream_number < max_streams_number; ++stream_number) + { + pipes.emplace_back(std::make_shared( + file_log, + storage_snapshot, + modified_context, + column_names, + file_log.getMaxBlockSize(), + file_log.getPollTimeoutMillisecond(), + stream_number, + max_streams_number, + file_log.filelog_settings->handle_error_mode)); + } + + return Pipe::unitePipes(std::move(pipes)); + } + + const Names column_names; + StoragePtr storage; + StorageSnapshotPtr storage_snapshot; +}; + StorageFileLog::StorageFileLog( const StorageID & table_id_, ContextPtr context_, @@ -296,62 +369,19 @@ UInt64 StorageFileLog::getInode(const String & file_name) return file_stat.st_ino; } -Pipe StorageFileLog::read( +void StorageFileLog::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /* query_info */, - ContextPtr local_context, + SelectQueryInfo & query_info, + ContextPtr query_context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, size_t /* num_streams */) + { - /// If there are MVs depended on this table, we just forbid reading - if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, - "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); - - if (mv_attached) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageFileLog with attached materialized views"); - - std::lock_guard lock(file_infos_mutex); - if (running_streams) - { - throw Exception(ErrorCodes::CANNOT_SELECT, "Another select query is running on this table, need to wait it finish."); - } - - updateFileInfos(); - - /// No files to parse - if (file_infos.file_names.empty()) - { - LOG_WARNING(log, "There is a idle table named {}, no files need to parse.", getName()); - return Pipe{}; - } - - auto modified_context = Context::createCopy(local_context); - - auto max_streams_number = std::min(filelog_settings->max_threads, file_infos.file_names.size()); - - /// Each stream responsible for closing it's files and store meta - openFilesAndSetPos(); - - Pipes pipes; - pipes.reserve(max_streams_number); - for (size_t stream_number = 0; stream_number < max_streams_number; ++stream_number) - { - pipes.emplace_back(std::make_shared( - *this, - storage_snapshot, - modified_context, - column_names, - getMaxBlockSize(), - getPollTimeoutMillisecond(), - stream_number, - max_streams_number, - filelog_settings->handle_error_mode)); - } - - return Pipe::unitePipes(std::move(pipes)); + query_plan.addStep( + std::make_unique(column_names, shared_from_this(), storage_snapshot, query_info, std::move(query_context))); } void StorageFileLog::increaseStreams() diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 3cb6ac1fbbf..33442d8b33b 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -49,7 +49,8 @@ public: void startup() override; void shutdown(bool is_drop) override; - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -133,6 +134,8 @@ public: const auto & getFileLogSettings() const { return filelog_settings; } private: + friend class ReadFromStorageFileLog; + std::unique_ptr filelog_settings; const String path; diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index ca84719b793..4df05d47003 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -2,7 +2,7 @@ #if USE_HDFS #include -#include +#include #include #include #include diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp index 6360bb2a3d5..173dd899ada 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index d8386839a6f..522a381700d 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -178,53 +179,31 @@ struct StorageKafkaInterceptors } }; -class ReadFromStorageKafkaStep final : public ISourceStep +class ReadFromStorageKafka final : public ReadFromStreamLikeEngine { public: - ReadFromStorageKafkaStep( + ReadFromStorageKafka( const Names & column_names_, StoragePtr storage_, const StorageSnapshotPtr & storage_snapshot_, SelectQueryInfo & query_info, ContextPtr context_) - : ISourceStep{DataStream{.header = storage_snapshot_->getSampleBlockForColumns(column_names_)}} + : ReadFromStreamLikeEngine{column_names_, storage_snapshot_, query_info.storage_limits, context_} , column_names{column_names_} , storage{storage_} , storage_snapshot{storage_snapshot_} - , storage_limits{query_info.storage_limits} - , context{context_} { } String getName() const override { return "ReadFromStorageKafka"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override - { - auto pipe = makePipe(); - - /// Add storage limits. - for (const auto & processor : pipe.getProcessors()) - processor->setStorageLimits(storage_limits); - - /// Add to processors to get processor info through explain pipeline statement. - for (const auto & processor : pipe.getProcessors()) - processors.emplace_back(processor); - - pipeline.init(std::move(pipe)); - } - private: - Pipe makePipe() + Pipe makePipe() final { auto & kafka_storage = storage->as(); if (kafka_storage.shutdown_called) throw Exception(ErrorCodes::ABORTED, "Table is detached"); - if (!context->getSettingsRef().stream_like_engine_allow_direct_select) - throw Exception( - ErrorCodes::QUERY_NOT_ALLOWED, - "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); - if (kafka_storage.mv_attached) throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageKafka with attached materialized views"); @@ -233,7 +212,7 @@ private: /// Always use all consumers at once, otherwise SELECT may not read messages from all partitions. Pipes pipes; pipes.reserve(kafka_storage.num_consumers); - auto modified_context = Context::createCopy(context); + auto modified_context = Context::createCopy(getContext()); modified_context->applySettingsChanges(kafka_storage.settings_adjustments); // Claim as many consumers as requested, but don't block @@ -255,13 +234,10 @@ private: LOG_DEBUG(kafka_storage.log, "Starting reading {} streams", pipes.size()); return Pipe::unitePipes(std::move(pipes)); } - ActionsDAGPtr buildFilterDAG(); const Names column_names; StoragePtr storage; StorageSnapshotPtr storage_snapshot; - std::shared_ptr storage_limits; - ContextPtr context; }; namespace @@ -447,7 +423,7 @@ void StorageKafka::read( size_t /* max_block_size */, size_t /* num_streams */) { - query_plan.addStep(std::make_unique( + query_plan.addStep(std::make_unique( column_names, shared_from_this(), storage_snapshot, query_info, std::move(query_context))); } diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 541fd32429d..d370d6018f7 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -20,7 +20,7 @@ namespace DB { class StorageSystemKafkaConsumers; -class ReadFromStorageKafkaStep; +class ReadFromStorageKafka; struct StorageKafkaInterceptors; @@ -88,7 +88,7 @@ public: SafeConsumers getSafeConsumers() { return {shared_from_this(), std::unique_lock(mutex), consumers}; } private: - friend class ReadFromStorageKafkaStep; + friend class ReadFromStorageKafka; // Configuration and state std::unique_ptr kafka_settings; diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 6c1377505d5..f4b92ff8c57 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -195,7 +195,7 @@ std::string DataPartStorageOnDiskBase::getDiskName() const std::string DataPartStorageOnDiskBase::getDiskType() const { - return toString(volume->getDisk()->getDataSourceDescription().type); + return volume->getDisk()->getDataSourceDescription().toString(); } bool DataPartStorageOnDiskBase::isStoredOnRemoteDisk() const diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 95b7c17ae78..a59f2a356e8 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -474,7 +474,7 @@ std::pair Fetcher::fetchSelected if (disk) { - LOG_TRACE(log, "Will fetch to disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type)); + LOG_TRACE(log, "Will fetch to disk {} with type {}", disk->getName(), disk->getDataSourceDescription().toString()); UInt64 revision = disk->getRevision(); if (revision) uri.addQueryParameter("disk_revision", toString(revision)); @@ -489,18 +489,18 @@ std::pair Fetcher::fetchSelected Disks disks = data.getDisks(); for (const auto & data_disk : disks) { - LOG_TRACE(log, "Checking disk {} with type {}", data_disk->getName(), toString(data_disk->getDataSourceDescription().type)); + LOG_TRACE(log, "Checking disk {} with type {}", data_disk->getName(), data_disk->getDataSourceDescription().toString()); if (data_disk->supportZeroCopyReplication()) { - LOG_TRACE(log, "Disk {} (with type {}) supports zero-copy replication", data_disk->getName(), toString(data_disk->getDataSourceDescription().type)); - capability.push_back(toString(data_disk->getDataSourceDescription().type)); + LOG_TRACE(log, "Disk {} (with type {}) supports zero-copy replication", data_disk->getName(), data_disk->getDataSourceDescription().toString()); + capability.push_back(data_disk->getDataSourceDescription().toString()); } } } else if (disk->supportZeroCopyReplication()) { - LOG_TRACE(log, "Trying to fetch with zero copy replication, provided disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type)); - capability.push_back(toString(disk->getDataSourceDescription().type)); + LOG_TRACE(log, "Trying to fetch with zero copy replication, provided disk {} with type {}", disk->getName(), disk->getDataSourceDescription().toString()); + capability.push_back(disk->getDataSourceDescription().toString()); } } @@ -544,7 +544,7 @@ std::pair Fetcher::fetchSelected { for (const auto & disk_candidate : data.getDisks()) { - if (toString(disk_candidate->getDataSourceDescription().type) == remote_fs_metadata) + if (disk_candidate->getDataSourceDescription().toString() == remote_fs_metadata) { preffered_disk = disk_candidate; break; @@ -601,11 +601,11 @@ std::pair Fetcher::fetchSelected if (!disk) { disk = reservation->getDisk(); - LOG_TRACE(log, "Disk for fetch is not provided, getting disk from reservation {} with type '{}'", disk->getName(), toString(disk->getDataSourceDescription().type)); + LOG_TRACE(log, "Disk for fetch is not provided, getting disk from reservation {} with type '{}'", disk->getName(), disk->getDataSourceDescription().toString()); } else { - LOG_TEST(log, "Disk for fetch is disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type)); + LOG_TEST(log, "Disk for fetch is disk {} with type {}", disk->getName(), disk->getDataSourceDescription().toString()); } UInt64 revision = parse(in->getResponseCookie("disk_revision", "0")); @@ -888,7 +888,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( readStringBinary(part_id, in); if (!disk->supportZeroCopyReplication() || !disk->checkUniqueId(part_id)) - throw Exception(ErrorCodes::ZERO_COPY_REPLICATION_ERROR, "Part {} unique id {} doesn't exist on {} (with type {}).", part_name, part_id, disk->getName(), toString(disk->getDataSourceDescription().type)); + throw Exception(ErrorCodes::ZERO_COPY_REPLICATION_ERROR, "Part {} unique id {} doesn't exist on {} (with type {}).", part_name, part_id, disk->getName(), disk->getDataSourceDescription().toString()); LOG_DEBUG(log, "Downloading part {} unique id {} metadata onto disk {}.", part_name, part_id, disk->getName()); zero_copy_temporary_lock_holder = data.lockSharedDataTemporary(part_name, part_id, disk); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e3de926570b..61332a4ff38 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6299,7 +6299,7 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules( "Trying to reserve {} on the selected disk: {} (with type {})", ReadableSize(expected_size), selected_disk->getName(), - toString(selected_disk->getDataSourceDescription().type)); + selected_disk->getDataSourceDescription().toString()); reservation = selected_disk->reserve(expected_size); } diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 84e3c10eace..cce7e56dda9 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1283,6 +1283,81 @@ inline void combineFiltersImpl(UInt8 * first_begin, const UInt8 * first_end, con } ) +/* The BMI2 intrinsic, _pdep_u64 (unsigned __int64 a, unsigned __int64 mask), works + * by copying contiguous low-order bits from unsigned 64-bit integer a to destination + * at the corresponding bit locations specified by mask. To implement the column + * combination with the intrinsic, 8 contiguous bytes would be loaded from second_begin + * as a UInt64 and act the first operand, meanwhile the mask should be constructed from + * first_begin so that the bytes to be replaced (non-zero elements) are mapped to 0xFF + * at the exact bit locations and 0x00 otherwise. + * + * The construction of mask employs the SSE intrinsic, mm_cmpeq_epi8(__m128i a, __m128i + * b), which compares packed 8-bit integers in first_begin and packed 0s and outputs + * 0xFF for equality and 0x00 for inequality. The result's negation then creates the + * desired bit masks for _pdep_u64. + * + * The below example visualizes how this optimization applies to the combination of + * two quadwords from first_begin and second_begin. + * + * Addr high low + * <---------------------------------------- + * first_begin............................0x00 0x11 0x12 0x00 0x00 0x13 0x14 0x15 + * | mm_cmpeq_epi8(src, 0) | | | | | | | | + * v v v v v v v v v + * inv_mask..............................0xFF 0x00 0x00 0xFF 0xFF 0x00 0x00 0x00 + * | (negation) | | | | | | | | + * v v v v v v v v v + * mask-------------------------+......0x00 0xFF 0xFF 0x00 0x00 0xFF 0xFF 0xFF + * | | | | | | + * v v v v v v + * dst = pdep_u64(second_begin, mask)..0x00 0x05 0x04 0x00 0x00 0x03 0x02 0x01 + * ^ ^ ^ ^ ^ ^ + * | | | | | | + * | | +---------+ | | | + * +------------------+ +---------+ | | | | + * | | | | | | + * second_begin...........................0x00 0x00 0x00 0x05 0x04 0x03 0x02 0x01 + * + * References: + * 1. https://www.felixcloutier.com/x86/pdep + * 2. https://www.felixcloutier.com/x86/pcmpeqb:pcmpeqw:pcmpeqd + */ +DECLARE_AVX2_SPECIFIC_CODE( +inline void combineFiltersImpl(UInt8 * first_begin, const UInt8 * first_end, const UInt8 * second_begin) +{ + constexpr size_t XMM_VEC_SIZE_IN_BYTES = 16; + const __m128i zero16 = _mm_setzero_si128(); + + while (first_begin + XMM_VEC_SIZE_IN_BYTES <= first_end) + { + __m128i src = _mm_loadu_si128(reinterpret_cast<__m128i *>(first_begin)); + __m128i inv_mask = _mm_cmpeq_epi8(src, zero16); + + UInt64 masks[] = { + ~static_cast(_mm_extract_epi64(inv_mask, 0)), + ~static_cast(_mm_extract_epi64(inv_mask, 1)), + }; + + for (const auto & mask: masks) + { + UInt64 dst = _pdep_u64(unalignedLoad(second_begin), mask); + unalignedStore(first_begin, dst); + + first_begin += sizeof(UInt64); + second_begin += std::popcount(mask) / 8; + } + } + + for (/* empty */; first_begin < first_end; ++first_begin) + { + if (*first_begin) + { + *first_begin = *second_begin++; + } + } +} +) + /// Second filter size must be equal to number of 1s in the first filter. /// The result has size equal to first filter size and contains 1s only where both filters contain 1s. static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second) @@ -1330,6 +1405,10 @@ static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second) { TargetSpecific::AVX512VBMI2::combineFiltersImpl(first_data.begin(), first_data.end(), second_data); } + else if (isArchSupported(TargetArch::AVX2)) + { + TargetSpecific::AVX2::combineFiltersImpl(first_data.begin(), first_data.end(), second_data); + } else #endif { diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index e0015cdeb40..654f6cf397f 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -44,7 +44,7 @@ void MergeTreeSettings::loadFromConfig(const String & config_elem, const Poco::U } } -void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr context) +void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr context, bool is_attach) { if (storage_def.settings) { @@ -64,7 +64,7 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr conte auto ast = dynamic_cast(custom.getImpl()).ast; if (ast && isDiskFunction(ast)) { - auto disk_name = getOrCreateDiskFromDiskAST(ast, context); + auto disk_name = getOrCreateDiskFromDiskAST(ast, context, is_attach); LOG_TRACE(&Poco::Logger::get("MergeTreeSettings"), "Created custom disk {}", disk_name); value = disk_name; } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index eb6c14d7754..732389927ab 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -255,7 +255,7 @@ struct MergeTreeSettings : public BaseSettings, public void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); /// NOTE: will rewrite the AST to add immutable settings. - void loadFromQuery(ASTStorage & storage_def, ContextPtr context); + void loadFromQuery(ASTStorage & storage_def, ContextPtr context, bool is_attach); /// We check settings after storage creation static bool isReadonlySetting(const String & name) diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 9a5af77d57c..8e646e48f16 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -608,7 +608,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) metadata.column_ttls_by_name[name] = new_ttl_entry; } - storage_settings->loadFromQuery(*args.storage_def, context); + storage_settings->loadFromQuery(*args.storage_def, context, args.attach); // updates the default storage_settings with settings specified via SETTINGS arg in a query if (args.storage_def->settings) diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index c3be07b6572..662a5c0ef5a 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -176,8 +176,6 @@ struct SelectQueryInfo /// /// Configured in StorageDistributed::getQueryProcessingStage() ClusterPtr optimized_cluster; - /// should we use custom key with the cluster - bool use_custom_key = false; TreeRewriterResultPtr syntax_analyzer_result; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 7ef2ff08827..a829002187b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -429,15 +429,10 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( size_t nodes = getClusterQueriedNodes(settings, cluster); - if (query_info.use_custom_key) - { - LOG_INFO(log, "Single shard cluster used with custom_key, transforming replicas into virtual shards"); - query_info.cluster = cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas); - } - else - { - query_info.cluster = cluster; + query_info.cluster = cluster; + if (!local_context->canUseParallelReplicasCustomKey(*cluster)) + { if (nodes > 1 && settings.optimize_skip_unused_shards) { /// Always calculate optimized cluster here, to avoid conditions during read() @@ -880,30 +875,22 @@ void StorageDistributed::read( storage_snapshot, processed_stage); - auto settings = local_context->getSettingsRef(); + const auto & settings = local_context->getSettingsRef(); ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator; - if (query_info.use_custom_key) + if (local_context->canUseParallelReplicasCustomKey(*query_info.getCluster())) { if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *local_context)) { - if (query_info.getCluster()->getShardCount() == 1) - { - // we are reading from single shard with multiple replicas but didn't transform replicas - // into virtual shards with custom_key set - throw Exception(ErrorCodes::LOGICAL_ERROR, "Replicas weren't transformed into virtual shards"); - } - additional_shard_filter_generator = - [&, my_custom_key_ast = std::move(custom_key_ast), shard_count = query_info.cluster->getShardCount()](uint64_t shard_num) -> ASTPtr + [my_custom_key_ast = std::move(custom_key_ast), + column_description = this->getInMemoryMetadataPtr()->columns, + custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, + context = local_context, + replica_count = query_info.getCluster()->getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr { return getCustomKeyFilterForParallelReplica( - shard_count, - shard_num - 1, - my_custom_key_ast, - settings.parallel_replicas_custom_key_filter_type, - this->getInMemoryMetadataPtr()->columns, - local_context); + replica_count, replica_num - 1, my_custom_key_ast, custom_key_type, column_description, context); }; } } diff --git a/src/Storages/StorageFactory.h b/src/Storages/StorageFactory.h index 239f1bb63ef..7b1d7235bac 100644 --- a/src/Storages/StorageFactory.h +++ b/src/Storages/StorageFactory.h @@ -31,6 +31,7 @@ public: struct Arguments { const String & engine_name; + /// Mutable to allow replacing constant expressions with literals, and other transformations. ASTs & engine_args; ASTStorage * storage_def; const ASTCreateQuery & query; diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index e3c960529de..8914838afed 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -779,6 +779,7 @@ void StorageKeeperMap::backupData(BackupEntriesCollector & backup_entries_collec &Poco::Logger::get(fmt::format("StorageKeeperMapBackup ({})", getStorageID().getNameForLogs())), [&] { return getClient(); }, WithRetries::KeeperSettings::fromContext(backup_entries_collector.getContext()), + backup_entries_collector.getContext()->getProcessListElement(), [](WithRetries::FaultyKeeper &) {} ); @@ -810,6 +811,7 @@ void StorageKeeperMap::restoreDataFromBackup(RestorerFromBackup & restorer, cons &Poco::Logger::get(fmt::format("StorageKeeperMapRestore ({})", getStorageID().getNameForLogs())), [&] { return getClient(); }, WithRetries::KeeperSettings::fromContext(restorer.getContext()), + restorer.getContext()->getProcessListElement(), [](WithRetries::FaultyKeeper &) {} ); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 45057813297..4761ccd8b58 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -40,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -215,25 +213,16 @@ void StorageMergeTree::read( { if (local_context->canUseParallelReplicasOnInitiator() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) { - ASTPtr modified_query_ast; + const auto table_id = getStorageID(); + const auto & modified_query_ast = ClusterProxy::rewriteSelectQuery( + local_context, query_info.query, + table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); + Block header; if (local_context->getSettingsRef().allow_experimental_analyzer) - { - QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone(); - rewriteJoinToGlobalJoin(modified_query_tree); - modified_query_tree = buildQueryTreeForShard(query_info, modified_query_tree); - header = InterpreterSelectQueryAnalyzer::getSampleBlock( - modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); - modified_query_ast = queryNodeToSelectQuery(modified_query_tree); - } + header = InterpreterSelectQueryAnalyzer::getSampleBlock(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()); else - { - const auto table_id = getStorageID(); - modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, - table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); - header - = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); - } + header = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory( @@ -244,6 +233,7 @@ void StorageMergeTree::read( ClusterProxy::executeQueryWithParallelReplicas( query_plan, + getStorageID(), select_stream_factory, modified_query_ast, local_context, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 30a08c392d2..f7e6783dbc2 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -704,13 +704,13 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() if (settings->allow_remote_fs_zero_copy_replication) { auto disks = getStoragePolicy()->getDisks(); - std::set disk_types_for_zero_copy; + std::set disk_types_for_zero_copy; for (auto & disk : disks) { if (!disk->supportZeroCopyReplication()) continue; - disk_types_for_zero_copy.insert(disk->getDataSourceDescription().type); + disk_types_for_zero_copy.insert(disk->getDataSourceDescription().toString()); } const auto table_shared_id_ = getTableSharedID(); @@ -733,9 +733,9 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() } } - for (const auto & disk_type: disk_types_for_zero_copy) + for (const auto & disk_type : disk_types_for_zero_copy) { - auto zero_copy = fmt::format("zero_copy_{}", toString(disk_type)); + auto zero_copy = fmt::format("zero_copy_{}", disk_type); auto zero_copy_path = fs::path(settings->remote_fs_zero_copy_zookeeper_path.toString()) / zero_copy; futures.push_back(zookeeper->asyncTryCreateNoThrow(zero_copy_path, String(), zkutil::CreateMode::Persistent)); futures.push_back(zookeeper->asyncTryCreateNoThrow(zero_copy_path / table_shared_id_, String(), zkutil::CreateMode::Persistent)); @@ -5385,9 +5385,7 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl( if (local_context->getSettingsRef().allow_experimental_analyzer) { - QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone(); - rewriteJoinToGlobalJoin(modified_query_tree); - modified_query_tree = buildQueryTreeForShard(query_info, modified_query_tree); + auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree); header = InterpreterSelectQueryAnalyzer::getSampleBlock( modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); @@ -5410,6 +5408,7 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl( ClusterProxy::executeQueryWithParallelReplicas( query_plan, + getStorageID(), select_stream_factory, modified_query_ast, local_context, @@ -9096,7 +9095,7 @@ zkutil::EphemeralNodeHolderPtr StorageReplicatedMergeTree::lockSharedDataTempora String id = part_id; boost::replace_all(id, "/", "_"); - String zc_zookeeper_path = getZeroCopyPartPath(*getSettings(), toString(disk->getDataSourceDescription().type), getTableSharedID(), + String zc_zookeeper_path = getZeroCopyPartPath(*getSettings(), disk->getDataSourceDescription().toString(), getTableSharedID(), part_name, zookeeper_path)[0]; String zookeeper_node = fs::path(zc_zookeeper_path) / id / replica_name; @@ -9608,7 +9607,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::tryToFetchIfShared if (!(disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication)) return nullptr; - String replica = getSharedDataReplica(part, data_source_description.type); + String replica = getSharedDataReplica(part, data_source_description); /// We can't fetch part when none replicas have this part on a same type remote disk if (replica.empty()) @@ -9618,7 +9617,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::tryToFetchIfShared } String StorageReplicatedMergeTree::getSharedDataReplica( - const IMergeTreeDataPart & part, DataSourceType data_source_type) const + const IMergeTreeDataPart & part, const DataSourceDescription & data_source_description) const { String best_replica; @@ -9626,7 +9625,7 @@ String StorageReplicatedMergeTree::getSharedDataReplica( if (!zookeeper) return ""; - Strings zc_zookeeper_paths = getZeroCopyPartPath(*getSettings(), toString(data_source_type), getTableSharedID(), part.name, + Strings zc_zookeeper_paths = getZeroCopyPartPath(*getSettings(), data_source_description.toString(), getTableSharedID(), part.name, zookeeper_path); std::set replicas; @@ -9783,7 +9782,7 @@ std::optional StorageReplicatedMergeTree::getZeroCopyPartPath(const Stri if (!disk || !disk->supportZeroCopyReplication()) return std::nullopt; - return getZeroCopyPartPath(*getSettings(), toString(disk->getDataSourceDescription().type), getTableSharedID(), part_name, zookeeper_path)[0]; + return getZeroCopyPartPath(*getSettings(), disk->getDataSourceDescription().toString(), getTableSharedID(), part_name, zookeeper_path)[0]; } bool StorageReplicatedMergeTree::waitZeroCopyLockToDisappear(const ZeroCopyLock & lock, size_t milliseconds_to_wait) @@ -10199,7 +10198,7 @@ bool StorageReplicatedMergeTree::removeSharedDetachedPart(DiskPtr disk, const St std::tie(can_remove, files_not_to_remove) = StorageReplicatedMergeTree::unlockSharedDataByID( id, table_uuid, part_info, detached_replica_name, - toString(disk->getDataSourceDescription().type), + disk->getDataSourceDescription().toString(), std::make_shared(zookeeper), local_context->getReplicatedMergeTreeSettings(), &Poco::Logger::get("StorageReplicatedMergeTree"), detached_zookeeper_path, diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index cb97c133ffc..3c3c2f56fe2 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -316,7 +316,7 @@ public: MutableDataPartPtr tryToFetchIfShared(const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) override; /// Get best replica having this partition on a same type remote disk - String getSharedDataReplica(const IMergeTreeDataPart & part, DataSourceType data_source_type) const; + String getSharedDataReplica(const IMergeTreeDataPart & part, const DataSourceDescription & data_source_description) const; inline const String & getReplicaName() const { return replica_name; } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 3ddbfe8d894..c376af5a3d7 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1488,25 +1488,13 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression') /// with optional headers() function - if (engine_args.empty() || engine_args.size() > 6) + size_t count = StorageURL::evalArgsAndCollectHeaders(engine_args, configuration.headers_from_ast, local_context); + + if (count == 0 || count > 6) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage S3 requires 1 to 5 arguments: " "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]"); - auto * header_it = StorageURL::collectHeaders(engine_args, configuration.headers_from_ast, local_context); - if (header_it != engine_args.end()) - engine_args.erase(header_it); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - /// Size -> argument indexes - static std::unordered_map> size_to_engine_args - { - {1, {{}}}, - {6, {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}} - }; - std::unordered_map engine_args_to_idx; bool no_sign_request = false; @@ -1514,7 +1502,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context /// - s3(source, format) /// - s3(source, NOSIGN) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - if (engine_args.size() == 2) + if (count == 2) { auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -1524,10 +1512,10 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context } /// For 3 arguments we support 2 possible variants: /// - s3(source, format, compression_method) - /// - s3(source, access_key_id, access_key_id) + /// - s3(source, access_key_id, secret_access_key) /// - s3(source, NOSIGN, format) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or format name. - else if (engine_args.size() == 3) + else if (count == 3) { auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/access_key_id/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -1545,7 +1533,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context /// - s3(source, access_key_id, secret_access_key, format) /// - s3(source, NOSIGN, format, compression_method) /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not. - else if (engine_args.size() == 4) + else if (count == 4) { auto second_arg = checkAndGetLiteralArgument(engine_args[1], "access_key_id/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -1569,7 +1557,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context /// For 5 arguments we support 2 possible variants: /// - s3(source, access_key_id, secret_access_key, session_token, format) /// - s3(source, access_key_id, secret_access_key, format, compression) - else if (engine_args.size() == 5) + else if (count == 5) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) @@ -1581,9 +1569,9 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; } } - else + else if (count == 6) { - engine_args_to_idx = size_to_engine_args[engine_args.size()]; + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}; } /// This argument is always the first diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 654b786326b..0ba72af6fc0 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -1324,7 +1324,7 @@ FormatSettings StorageURL::getFormatSettingsFromArgs(const StorageFactory::Argum return format_settings; } -ASTs::iterator StorageURL::collectHeaders( +size_t StorageURL::evalArgsAndCollectHeaders( ASTs & url_function_args, HTTPHeaderEntries & header_entries, ContextPtr context) { ASTs::iterator headers_it = url_function_args.end(); @@ -1382,7 +1382,11 @@ ASTs::iterator StorageURL::collectHeaders( (*arg_it) = evaluateConstantExpressionOrIdentifierAsLiteral((*arg_it), context); } - return headers_it; + if (headers_it == url_function_args.end()) + return url_function_args.size(); + + std::rotate(headers_it, std::next(headers_it), url_function_args.end()); + return url_function_args.size() - 1; } void StorageURL::processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection) @@ -1412,21 +1416,19 @@ StorageURL::Configuration StorageURL::getConfiguration(ASTs & args, ContextPtr l if (auto named_collection = tryGetNamedCollectionWithOverrides(args, local_context)) { StorageURL::processNamedCollectionResult(configuration, *named_collection); - collectHeaders(args, configuration.headers, local_context); + evalArgsAndCollectHeaders(args, configuration.headers, local_context); } else { - if (args.empty() || args.size() > 3) + size_t count = evalArgsAndCollectHeaders(args, configuration.headers, local_context); + + if (count == 0 || count > 3) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, bad_arguments_error_message); - auto * header_it = collectHeaders(args, configuration.headers, local_context); - if (header_it != args.end()) - args.erase(header_it); - configuration.url = checkAndGetLiteralArgument(args[0], "url"); - if (args.size() > 1) + if (count > 1) configuration.format = checkAndGetLiteralArgument(args[1], "format"); - if (args.size() == 3) + if (count == 3) configuration.compression_method = checkAndGetLiteralArgument(args[2], "compression_method"); } diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 07d4d0cad38..c8b8d0942f4 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -294,7 +294,10 @@ public: static Configuration getConfiguration(ASTs & args, ContextPtr context); - static ASTs::iterator collectHeaders(ASTs & url_function_args, HTTPHeaderEntries & header_entries, ContextPtr context); + /// Does evaluateConstantExpressionOrIdentifierAsLiteral() on all arguments. + /// If `headers(...)` argument is present, parses it and moves it to the end of the array. + /// Returns number of arguments excluding `headers(...)`. + static size_t evalArgsAndCollectHeaders(ASTs & url_function_args, HTTPHeaderEntries & header_entries, ContextPtr context); static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection); }; diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 9abbab9ff91..30d64156b22 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -69,7 +69,7 @@ Pipe StorageSystemDisks::read( col_unreserved->insert(disk_ptr->getUnreservedSpace().value_or(std::numeric_limits::max())); col_keep->insert(disk_ptr->getKeepingFreeSpace()); auto data_source_description = disk_ptr->getDataSourceDescription(); - col_type->insert(toString(data_source_description.type)); + col_type->insert(data_source_description.toString()); col_is_encrypted->insert(data_source_description.is_encrypted); col_is_read_only->insert(disk_ptr->isReadOnly()); col_is_write_once->insert(disk_ptr->isWriteOnce()); diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp index 6a9502d0bd8..ba07d44dbf9 100644 --- a/src/Storages/System/StorageSystemScheduler.cpp +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -4,15 +4,15 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include -#include "IO/ResourceRequest.h" +#include "Common/Scheduler/ResourceRequest.h" namespace DB diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 5ea28d9e09c..00cc5e3ee58 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -373,37 +373,11 @@ QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeN removeGroupingFunctionSpecializations(query_tree_to_modify); + // std::cerr << "====================== build 1 \n" << query_tree_to_modify->dumpTree() << std::endl; createUniqueTableAliases(query_tree_to_modify, nullptr, planner_context->getQueryContext()); + // std::cerr << "====================== build 2 \n" << query_tree_to_modify->dumpTree() << std::endl; return query_tree_to_modify; } -class RewriteJoinToGlobalJoinVisitor : public InDepthQueryTreeVisitor -{ -public: - using Base = InDepthQueryTreeVisitor; - using Base::Base; - - void visitImpl(QueryTreeNodePtr & node) - { - if (auto * join_node = node->as()) - join_node->setLocality(JoinLocality::Global); - } - - static bool needChildVisit(QueryTreeNodePtr & parent, QueryTreeNodePtr & child) - { - auto * join_node = parent->as(); - if (join_node && join_node->getRightTableExpression() == child) - return false; - - return true; - } -}; - -void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify) -{ - RewriteJoinToGlobalJoinVisitor visitor; - visitor.visit(query_tree_to_modify); -} - } diff --git a/src/Storages/buildQueryTreeForShard.h b/src/Storages/buildQueryTreeForShard.h index eec5a0dc38a..05d63faeb9f 100644 --- a/src/Storages/buildQueryTreeForShard.h +++ b/src/Storages/buildQueryTreeForShard.h @@ -12,6 +12,4 @@ using QueryTreeNodePtr = std::shared_ptr; QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeNodePtr query_tree_to_modify); -void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify); - } diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index c52256fb984..a9c5a5c99f0 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -67,23 +67,11 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context else { - auto * header_it = StorageURL::collectHeaders(args, configuration.headers_from_ast, context); - if (header_it != args.end()) - args.erase(header_it); + size_t count = StorageURL::evalArgsAndCollectHeaders(args, configuration.headers_from_ast, context); - if (args.empty() || args.size() > 7) + if (count == 0 || count > 7) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); - for (auto & arg : args) - arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); - - /// Size -> argument indexes - static std::unordered_map> size_to_args - { - {1, {{}}}, - {7, {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}} - }; - std::unordered_map args_to_idx; bool no_sign_request = false; @@ -92,7 +80,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context /// - s3(source, format) /// - s3(source, NOSIGN) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - if (args.size() == 2) + if (count == 2) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -102,10 +90,10 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } /// For 3 arguments we support 3 possible variants: /// - s3(source, format, structure) - /// - s3(source, access_key_id, access_key_id) + /// - s3(source, access_key_id, secret_access_key) /// - s3(source, NOSIGN, format) /// We can distinguish them by looking at the 2-nd argument: check if it's a format name or not. - else if (args.size() == 3) + else if (count == 3) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -120,11 +108,11 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } /// For 4 arguments we support 4 possible variants: /// - s3(source, format, structure, compression_method), - /// - s3(source, access_key_id, access_key_id, format), - /// - s3(source, access_key_id, access_key_id, session_token) + /// - s3(source, access_key_id, secret_access_key, format), + /// - s3(source, access_key_id, secret_access_key, session_token) /// - s3(source, NOSIGN, format, structure) /// We can distinguish them by looking at the 2-nd and 4-th argument: check if it's a format name or not. - else if (args.size() == 4) + else if (count == 4) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -150,12 +138,12 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } } /// For 5 arguments we support 3 possible variants: - /// - s3(source, access_key_id, access_key_id, format, structure) - /// - s3(source, access_key_id, access_key_id, session_token, format) + /// - s3(source, access_key_id, secret_access_key, format, structure) + /// - s3(source, access_key_id, secret_access_key, session_token, format) /// - s3(source, NOSIGN, format, structure, compression_method) /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or no, /// and by the 4-th argument, check if it's a format name or not - else if (args.size() == 5) + else if (count == 5) { auto second_arg = checkAndGetLiteralArgument(args[1], "NOSIGN/access_key_id"); if (boost::iequals(second_arg, "NOSIGN")) @@ -177,10 +165,10 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } } // For 6 arguments we support 2 possible variants: - /// - s3(source, access_key_id, access_key_id, format, structure, compression_method) - /// - s3(source, access_key_id, access_key_id, session_token, format, structure) + /// - s3(source, access_key_id, secret_access_key, format, structure, compression_method) + /// - s3(source, access_key_id, secret_access_key, session_token, format, structure) /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not - else if (args.size() == 6) + else if (count == 6) { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) @@ -192,9 +180,9 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}}; } } - else + else if (count == 7) { - args_to_idx = size_to_args[args.size()]; + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}; } /// This argument is always the first @@ -262,24 +250,16 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & } else { - /// If arguments contain headers, just remove it and add to the end of arguments later - /// (header argument can be at any position). HTTPHeaderEntries tmp_headers; - auto * headers_it = StorageURL::collectHeaders(args, tmp_headers, context); - ASTPtr headers_ast; - if (headers_it != args.end()) - { - headers_ast = *headers_it; - args.erase(headers_it); - } + size_t count = StorageURL::evalArgsAndCollectHeaders(args, tmp_headers, context); - if (args.empty() || args.size() > getMaxNumberOfArguments()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), args.size()); + if (count == 0 || count > getMaxNumberOfArguments()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), count); auto structure_literal = std::make_shared(structure); /// s3(s3_url) - if (args.size() == 1) + if (count == 1) { /// Add format=auto before structure argument. args.push_back(std::make_shared("auto")); @@ -287,7 +267,7 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & } /// s3(s3_url, format) or s3(s3_url, NOSIGN) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - else if (args.size() == 2) + else if (count == 2) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); /// If there is NOSIGN, add format=auto before structure. @@ -296,10 +276,10 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & args.push_back(structure_literal); } /// s3(source, format, structure) or - /// s3(source, access_key_id, access_key_id) or + /// s3(source, access_key_id, secret_access_key) or /// s3(source, NOSIGN, format) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. - else if (args.size() == 3) + else if (count == 3) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -308,7 +288,7 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & } else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) { - args.back() = structure_literal; + args[count - 1] = structure_literal; } else { @@ -318,48 +298,45 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & } } /// s3(source, format, structure, compression_method) or - /// s3(source, access_key_id, access_key_id, format) or + /// s3(source, access_key_id, secret_access_key, format) or /// s3(source, NOSIGN, format, structure) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. - else if (args.size() == 4) + else if (count == 4) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) { - args.back() = structure_literal; + args[count - 1] = structure_literal; } else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) { - args[args.size() - 2] = structure_literal; + args[count - 2] = structure_literal; } else { args.push_back(structure_literal); } } - /// s3(source, access_key_id, access_key_id, format, structure) or + /// s3(source, access_key_id, secret_access_key, format, structure) or /// s3(source, NOSIGN, format, structure, compression_method) /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not. - else if (args.size() == 5) + else if (count == 5) { auto sedond_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); if (boost::iequals(sedond_arg, "NOSIGN")) { - args[args.size() - 2] = structure_literal; + args[count - 2] = structure_literal; } else { - args.back() = structure_literal; + args[count - 1] = structure_literal; } } - /// s3(source, access_key_id, access_key_id, format, structure, compression) - else if (args.size() == 6) + /// s3(source, access_key_id, secret_access_key, format, structure, compression) + else if (count == 6) { - args[args.size() - 2] = structure_literal; + args[count - 2] = structure_literal; } - - if (headers_ast) - args.push_back(headers_ast); } } diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index 254cdba64d5..aa535991d65 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -57,16 +57,24 @@ void TableFunctionURL::parseArgumentsImpl(ASTs & args, const ContextPtr & contex if (format == "auto") format = FormatFactory::instance().getFormatFromFileName(Poco::URI(filename).getPath(), true); - StorageURL::collectHeaders(args, configuration.headers, context); + StorageURL::evalArgsAndCollectHeaders(args, configuration.headers, context); } else { - auto * headers_it = StorageURL::collectHeaders(args, configuration.headers, context); + size_t count = StorageURL::evalArgsAndCollectHeaders(args, configuration.headers, context); /// ITableFunctionFileLike cannot parse headers argument, so remove it. - if (headers_it != args.end()) - args.erase(headers_it); + ASTPtr headers_ast; + if (count != args.size()) + { + chassert(count + 1 == args.size()); + headers_ast = args.back(); + args.pop_back(); + } ITableFunctionFileLike::parseArgumentsImpl(args, context); + + if (headers_ast) + args.push_back(headers_ast); } } @@ -82,15 +90,15 @@ void TableFunctionURL::addColumnsStructureToArguments(ASTs & args, const String } else { - /// If arguments contain headers, just remove it and add to the end of arguments later - /// (header argument can be at any position). + /// If arguments contain headers, just remove it and add to the end of arguments later. HTTPHeaderEntries tmp_headers; - auto * headers_it = StorageURL::collectHeaders(args, tmp_headers, context); + size_t count = StorageURL::evalArgsAndCollectHeaders(args, tmp_headers, context); ASTPtr headers_ast; - if (headers_it != args.end()) + if (count != args.size()) { - headers_ast = *headers_it; - args.erase(headers_it); + chassert(count + 1 == args.size()); + headers_ast = args.back(); + args.pop_back(); } ITableFunctionFileLike::addColumnsStructureToArguments(args, desired_structure, context); diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 68004eec2bb..41e4ef19361 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -6,29 +6,16 @@ import subprocess import sys from pathlib import Path -from github import Github - from build_download_helper import get_build_name_for_check, read_build_urls from clickhouse_helper import ( CiLogsCredentials, - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) -from commit_status_helper import ( - RerunHelper, - format_description, - get_commit, - post_commit_status, ) from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import REPORT_PATH, TEMP_PATH -from get_robot_token import get_best_robot_token from pr_info import PRInfo -from report import TestResult -from s3_helper import S3Helper +from report import JobReport from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results IMAGE_NAME = "clickhouse/fuzzer" @@ -77,14 +64,6 @@ def main(): pr_info = PRInfo() - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - docker_image = pull_image(get_docker_image(IMAGE_NAME)) build_name = get_build_name_for_check(check_name) @@ -131,10 +110,6 @@ def main(): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) ci_logs_credentials.clean_ci_logs_from_credentials(run_log_path) - check_name_lower = ( - check_name.lower().replace("(", "").replace(")", "").replace(" ", "") - ) - s3_prefix = f"{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/" paths = { "run.log": run_log_path, "main.log": main_log_path, @@ -154,17 +129,6 @@ def main(): if not_compressed_server_log_path.exists(): paths["server.log"] = not_compressed_server_log_path - s3_helper = S3Helper() - urls = [] - report_url = "" - for file, path in paths.items(): - try: - url = s3_helper.upload_test_report_to_s3(path, s3_prefix + file) - report_url = url if file == "report.html" else report_url - urls.append(url) - except Exception as ex: - logging.info("Exception uploading file %s text %s", file, ex) - # Try to get status message saved by the fuzzer try: with open(workspace_path / "status.txt", "r", encoding="utf-8") as status_f: @@ -176,42 +140,19 @@ def main(): status = "failure" description = "Task failed: $?=" + str(retcode) - description = format_description(description) + JobReport( + description=description, + test_results=[], + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + # test generates its own report.html + additional_files=[v for _, v in paths.items()], + ).dump() - test_result = TestResult(description, "OK") - if "fail" in status: - test_result.status = "FAIL" - - if not report_url: - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - [test_result], - [], - check_name, - urls, - ) - - ch_helper = ClickHouseHelper() - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - [test_result], - status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - - logging.info("Result: '%s', '%s', '%s'", status, description, report_url) - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, status, report_url, description, check_name, pr_info, dump_to_file=True - ) + logging.info("Result: '%s', '%s'", status, description) + if status == "failure": + sys.exit(1) if __name__ == "__main__": diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 27243aac4f1..cec8c4c7b65 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -12,15 +12,13 @@ from ci_config import CI_CONFIG, BuildConfig from cache_utils import CargoCache from env_helper import ( - GITHUB_JOB_API_URL, REPO_COPY, S3_BUILDS_BUCKET, - S3_DOWNLOAD, TEMP_PATH, ) -from git_helper import Git, git_runner +from git_helper import Git from pr_info import PRInfo -from report import BuildResult, FAILURE, StatusType, SUCCESS +from report import FAILURE, JobReport, StatusType, SUCCESS from s3_helper import S3Helper from tee_popen import TeePopen import docker_images_helper @@ -29,13 +27,6 @@ from version_helper import ( get_version_from_repo, update_version_local, ) -from clickhouse_helper import ( - ClickHouseHelper, - CiLogsCredentials, - prepare_tests_results_for_clickhouse, - get_instance_type, - get_instance_id, -) from stopwatch import Stopwatch IMAGE_NAME = "clickhouse/binary-builder" @@ -122,61 +113,6 @@ def build_clickhouse( return build_log_path, SUCCESS if success else FAILURE -def check_for_success_run( - s3_helper: S3Helper, - s3_prefix: str, - build_name: str, - version: ClickHouseVersion, -) -> None: - # TODO: Remove after S3 artifacts - logging.info("Checking for artifacts %s in bucket %s", s3_prefix, S3_BUILDS_BUCKET) - try: - # Performance artifacts are now part of regular build, so we're safe - build_results = s3_helper.list_prefix(s3_prefix) - except Exception as ex: - logging.info("Got exception while listing %s: %s\nRerun", s3_prefix, ex) - return - - if build_results is None or len(build_results) == 0: - logging.info("Nothing found in %s, rerun", s3_prefix) - return - - logging.info("Some build results found:\n%s", build_results) - build_urls = [] - log_url = "" - for url in build_results: - url_escaped = url.replace("+", "%2B").replace(" ", "%20") - if BUILD_LOG_NAME in url: - log_url = f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{url_escaped}" - else: - build_urls.append(f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{url_escaped}") - if not log_url: - # log is uploaded the last, so if there's no log we need to rerun the build - return - - success = len(build_urls) > 0 - build_result = BuildResult( - build_name, - log_url, - build_urls, - version.describe, - SUCCESS if success else FAILURE, - 0, - GITHUB_JOB_API_URL(), - ) - result_json_path = build_result.write_json(Path(TEMP_PATH)) - logging.info( - "Build result file %s is written, content:\n %s", - result_json_path, - result_json_path.read_text(encoding="utf-8"), - ) - # Fail build job if not successeded - if not success: - sys.exit(1) - else: - sys.exit(0) - - def get_release_or_pr(pr_info: PRInfo, version: ClickHouseVersion) -> Tuple[str, str]: "Return prefixes for S3 artifacts paths" # FIXME performance @@ -196,34 +132,6 @@ def get_release_or_pr(pr_info: PRInfo, version: ClickHouseVersion) -> Tuple[str, return pr_number, pr_number -def upload_master_static_binaries( - pr_info: PRInfo, - build_config: BuildConfig, - s3_helper: S3Helper, - build_output_path: Path, -) -> None: - """Upload binary artifacts to a static S3 links""" - static_binary_name = build_config.static_binary_name - if pr_info.number != 0: - return - elif not static_binary_name: - return - elif pr_info.base_ref != "master": - return - - # Full binary with debug info: - s3_path_full = "/".join((pr_info.base_ref, static_binary_name, "clickhouse-full")) - binary_full = build_output_path / "clickhouse" - url_full = s3_helper.upload_build_file_to_s3(binary_full, s3_path_full) - print(f"::notice ::Binary static URL (with debug info): {url_full}") - - # Stripped binary without debug info: - s3_path_compact = "/".join((pr_info.base_ref, static_binary_name, "clickhouse")) - binary_compact = build_output_path / "clickhouse-stripped" - url_compact = s3_helper.upload_build_file_to_s3(binary_compact, s3_path_compact) - print(f"::notice ::Binary static URL (compact): {url_compact}") - - def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser("Clickhouse builder script") parser.add_argument( @@ -254,21 +162,6 @@ def main(): s3_helper = S3Helper() version = get_version_from_repo(git=Git(True)) - release_or_pr, performance_pr = get_release_or_pr(pr_info, version) - - s3_path_prefix = "/".join((release_or_pr, pr_info.sha, build_name)) - # FIXME performance - s3_performance_path = "/".join( - (performance_pr, pr_info.sha, build_name, "performance.tar.zst") - ) - - # FIXME: to be removed in favor of "skip by job digest" - # If this is rerun, then we try to find already created artifacts and just - # put them as github actions artifact (result) - # The s3_path_prefix has additional "/" in the end to prevent finding - # e.g. `binary_darwin_aarch64/clickhouse` for `binary_darwin` - check_for_success_run(s3_helper, f"{s3_path_prefix}/", build_name, version) - logging.info("Got version from repo %s", version.string) official_flag = pr_info.number == 0 @@ -331,174 +224,16 @@ def main(): ) sys.exit(1) - # FIXME performance - performance_urls = [] - performance_path = build_output_path / "performance.tar.zst" - if performance_path.exists(): - performance_urls.append( - s3_helper.upload_build_file_to_s3(performance_path, s3_performance_path) - ) - logging.info( - "Uploaded performance.tar.zst to %s, now delete to avoid duplication", - performance_urls[0], - ) - performance_path.unlink() - - build_urls = ( - s3_helper.upload_build_directory_to_s3( - build_output_path, - s3_path_prefix, - keep_dirs_in_s3_path=False, - upload_symlinks=False, - ) - + performance_urls - ) - logging.info("Got build URLs %s", build_urls) - - print("::notice ::Build URLs: {}".format("\n".join(build_urls))) - - if log_path.exists(): - log_url = s3_helper.upload_build_file_to_s3( - log_path, s3_path_prefix + "/" + log_path.name - ) - logging.info("Log url %s", log_url) - else: - logging.info("Build log doesn't exist") - - print(f"::notice ::Log URL: {log_url}") - - build_result = BuildResult( - build_name, - log_url, - build_urls, - version.describe, - build_status, - elapsed, - GITHUB_JOB_API_URL(), - ) - result_json_path = build_result.write_json(temp_path) - logging.info( - "Build result file %s is written, content:\n %s", - result_json_path, - result_json_path.read_text(encoding="utf-8"), - ) - - upload_master_static_binaries(pr_info, build_config, s3_helper, build_output_path) - - # Upload profile data - ch_helper = ClickHouseHelper() - - ci_logs_credentials = CiLogsCredentials(Path("/dev/null")) - if ci_logs_credentials.host: - instance_type = get_instance_type() - instance_id = get_instance_id() - query = f"""INSERT INTO build_time_trace -( - pull_request_number, - commit_sha, - check_start_time, - check_name, - instance_type, - instance_id, - file, - library, - time, - pid, - tid, - ph, - ts, - dur, - cat, - name, - detail, - count, - avgMs, - args_name -) -SELECT {pr_info.number}, '{pr_info.sha}', '{stopwatch.start_time_str}', '{build_name}', '{instance_type}', '{instance_id}', * -FROM input(' - file String, - library String, - time DateTime64(6), - pid UInt32, - tid UInt32, - ph String, - ts UInt64, - dur UInt64, - cat String, - name String, - detail String, - count UInt64, - avgMs UInt64, - args_name String') -FORMAT JSONCompactEachRow""" - - auth = { - "X-ClickHouse-User": "ci", - "X-ClickHouse-Key": ci_logs_credentials.password, - } - url = f"https://{ci_logs_credentials.host}/" - profiles_dir = temp_path / "profiles_source" - profiles_dir.mkdir(parents=True, exist_ok=True) - logging.info( - "Processing profile JSON files from %s", repo_path / "build_docker" - ) - git_runner( - "./utils/prepare-time-trace/prepare-time-trace.sh " - f"build_docker {profiles_dir.absolute()}" - ) - profile_data_file = temp_path / "profile.json" - with open(profile_data_file, "wb") as profile_fd: - for profile_source in profiles_dir.iterdir(): - if profile_source.name != "binary_sizes.txt": - with open(profiles_dir / profile_source, "rb") as ps_fd: - profile_fd.write(ps_fd.read()) - - logging.info( - "::notice ::Log Uploading profile data, path: %s, size: %s, query: %s", - profile_data_file, - profile_data_file.stat().st_size, - query, - ) - ch_helper.insert_file(url, auth, query, profile_data_file) - - query = f"""INSERT INTO binary_sizes -( - pull_request_number, - commit_sha, - check_start_time, - check_name, - instance_type, - instance_id, - file, - size -) -SELECT {pr_info.number}, '{pr_info.sha}', '{stopwatch.start_time_str}', '{build_name}', '{instance_type}', '{instance_id}', file, size -FROM input('size UInt64, file String') -SETTINGS format_regexp = '^\\s*(\\d+) (.+)$' -FORMAT Regexp""" - - binary_sizes_file = profiles_dir / "binary_sizes.txt" - - logging.info( - "::notice ::Log Uploading binary sizes data, path: %s, size: %s, query: %s", - binary_sizes_file, - binary_sizes_file.stat().st_size, - query, - ) - ch_helper.insert_file(url, auth, query, binary_sizes_file) - - # Upload statistics to CI database - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - [], - build_status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - log_url, - f"Build ({build_name})", - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=version.describe, + test_results=[], + status=build_status, + start_time=stopwatch.start_time_str, + duration=elapsed, + additional_files=[log_path], + build_dir_for_upload=build_output_path, + version=version.describe, + ).dump() # Fail the build job if it didn't succeed if build_status != SUCCESS: diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index d9925725eec..8f8f2b28935 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -4,12 +4,9 @@ import json import logging import os import sys -import atexit from pathlib import Path from typing import List -from github import Github - from env_helper import ( GITHUB_JOB_URL, GITHUB_REPOSITORY, @@ -22,20 +19,14 @@ from report import ( ERROR, PENDING, SUCCESS, + JobReport, create_build_html_report, get_worst_status, ) -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token + from pr_info import PRInfo -from commit_status_helper import ( - RerunHelper, - format_description, - get_commit, - post_commit_status, - update_mergeable_check, -) from ci_config import CI_CONFIG +from stopwatch import Stopwatch # Old way to read the neads_data @@ -46,6 +37,7 @@ NEEDS_DATA = os.getenv("NEEDS_DATA", "") def main(): logging.basicConfig(level=logging.INFO) + stopwatch = Stopwatch() temp_path = Path(TEMP_PATH) reports_path = Path(REPORT_PATH) temp_path.mkdir(parents=True, exist_ok=True) @@ -74,16 +66,7 @@ def main(): if needs_data: logging.info("The next builds are required: %s", ", ".join(needs_data)) - gh = Github(get_best_robot_token(), per_page=100) pr_info = PRInfo() - commit = get_commit(gh, pr_info.sha) - - atexit.register(update_mergeable_check, commit, pr_info, build_check_name) - - rerun_helper = RerunHelper(commit, build_check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) builds_for_check = CI_CONFIG.get_builds_for_report(build_check_name) required_builds = required_builds or len(builds_for_check) @@ -91,10 +74,15 @@ def main(): # Collect reports from json artifacts build_results = [] for build_name in builds_for_check: - build_result = BuildResult.read_json(reports_path, build_name) - if build_result.is_missing: + build_result = BuildResult.load_any( + build_name, pr_info.number, pr_info.head_ref + ) + if not build_result: logging.warning("Build results for %s are missing", build_name) continue + assert ( + pr_info.head_ref == build_result.head_ref or pr_info.number > 0 + ), "BUG. if not a PR, report must be created on the same branch" build_results.append(build_result) # The code to collect missing reports for failed jobs @@ -125,8 +113,6 @@ def main(): logging.error("No success builds, failing check without creating a status") sys.exit(1) - s3_helper = S3Helper() - branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master" branch_name = "master" if pr_info.number != 0: @@ -146,18 +132,6 @@ def main(): report_path = temp_path / "report.html" report_path.write_text(report, encoding="utf-8") - logging.info("Going to upload prepared report") - context_name_for_path = build_check_name.lower().replace(" ", "_") - s3_path_prefix = ( - str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path - ) - - url = s3_helper.upload_test_report_to_s3( - report_path, s3_path_prefix + "/report.html" - ) - logging.info("Report url %s", url) - print(f"::notice ::Report url: {url}") - # Prepare a commit status summary_status = get_worst_status(br.status for br in build_results) @@ -174,19 +148,16 @@ def main(): f" ({required_builds - missing_builds} of {required_builds} builds are OK)" ) - description = format_description( - f"{ok_groups}/{total_groups} artifact groups are OK{addition}" - ) + description = f"{ok_groups}/{total_groups} artifact groups are OK{addition}" - post_commit_status( - commit, - summary_status, - url, - description, - build_check_name, - pr_info, - dump_to_file=True, - ) + JobReport( + description=description, + test_results=[], + status=summary_status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[report_path], + ).dump() if summary_status == ERROR: sys.exit(1) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 701f66b9a6a..067aa6173fc 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1,6 +1,7 @@ import argparse import concurrent.futures import json +import logging import os import re import subprocess @@ -9,22 +10,41 @@ from pathlib import Path from typing import Any, Dict, Iterable, List, Optional import docker_images_helper -from ci_config import CI_CONFIG +from ci_config import CI_CONFIG, Labels from commit_status_helper import ( CommitStatusData, + RerunHelper, format_description, get_commit, + post_commit_status, set_status_comment, + update_mergeable_check, ) from digest_helper import DockerDigester, JobDigester -from env_helper import CI, REPORT_PATH, ROOT_DIR, S3_BUILDS_BUCKET, TEMP_PATH +from env_helper import ( + CI, + GITHUB_JOB_API_URL, + REPO_COPY, + REPORT_PATH, + S3_BUILDS_BUCKET, + TEMP_PATH, +) from get_robot_token import get_best_robot_token from git_helper import GIT_PREFIX, Git from git_helper import Runner as GitRunner from github import Github from pr_info import PRInfo -from report import BuildResult +from report import SUCCESS, BuildResult, JobReport from s3_helper import S3Helper +from clickhouse_helper import ( + CiLogsCredentials, + ClickHouseHelper, + get_instance_id, + get_instance_type, + prepare_tests_results_for_clickhouse, +) +from build_check import get_release_or_pr +import upload_result_helper from version_helper import get_version_from_repo @@ -42,20 +62,6 @@ def normalize_check_name(check_name: str) -> str: return res -def is_build_job(job: str) -> bool: - if "package_" in job or "binary_" in job or job == "fuzzers": - return True - return False - - -def is_test_job(job: str) -> bool: - return not is_build_job(job) and not "Style" in job and not "Docs check" in job - - -def is_docs_job(job: str) -> bool: - return "Docs check" in job - - def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: # FIXME: consider switching to sub_parser for configure, pre, run, post actions parser.add_argument( @@ -94,6 +100,12 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: type=str, help="Job name as in config", ) + parser.add_argument( + "--run-command", + default="", + type=str, + help="A run command to run in --run action. Will override run_command from a job config if any", + ) parser.add_argument( "--batch", default=-1, @@ -149,6 +161,11 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: default=False, help="will create run config without skipping build jobs in any case, used in --configure action (for release branches)", ) + parser.add_argument( + "--commit-message", + default="", + help="debug option to test commit message processing", + ) return parser.parse_args() @@ -271,6 +288,7 @@ def _update_config_for_docs_only(run_config: dict) -> None: def _configure_docker_jobs( rebuild_all_dockers: bool, docker_digest_or_latest: bool = False ) -> Dict: + print("::group::Docker images check") # generate docker jobs data docker_digester = DockerDigester() imagename_digest_dict = ( @@ -283,7 +301,6 @@ def _configure_docker_jobs( # FIXME: we need login as docker manifest inspect goes directly to one of the *.docker.com hosts instead of "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] # find if it's possible to use the setting of /etc/docker/daemon.json docker_images_helper.docker_login() - print("Start checking missing images in dockerhub") missing_multi_dict = check_missing_images_on_dockerhub(imagename_digest_dict) missing_multi = list(missing_multi_dict) missing_amd64 = [] @@ -313,7 +330,6 @@ def _configure_docker_jobs( ) for image in missing_multi: imagename_digest_dict[image] = "latest" - print("...checking missing images in dockerhub - done") else: # add all images to missing missing_multi = list(imagename_digest_dict) @@ -324,6 +340,7 @@ def _configure_docker_jobs( for name in imagename_digest_dict if not images_info[name]["only_amd64"] ] + print("::endgroup::") return { "images": imagename_digest_dict, @@ -341,30 +358,36 @@ def _configure_jobs( rebuild_all_binaries: bool, pr_labels: Iterable[str], commit_tokens: List[str], + ci_cache_enabled: bool, ) -> Dict: - # a. digest each item from the config + ## a. digest each item from the config job_digester = JobDigester() jobs_params: Dict[str, Dict] = {} jobs_to_do: List[str] = [] jobs_to_skip: List[str] = [] digests: Dict[str, str] = {} - print("Calculating job digests - start") + print("::group::Job Digests") + for job in CI_CONFIG.job_generator(): digest = job_digester.get_job_digest(CI_CONFIG.get_digest_config(job)) digests[job] = digest print(f" job [{job.rjust(50)}] has digest [{digest}]") - print("Calculating job digests - done") + print("::endgroup::") + + ## b. check if we have something done + if ci_cache_enabled: + done_files = [] + else: + path = get_s3_path(build_digest) + done_files = s3.list_prefix(path) + done_files = [file.split("/")[-1] for file in done_files] + # print(f"S3 CI files for the build [{build_digest}]: {done_files}") + docs_path = get_s3_path_docs(docs_digest) + done_files_docs = s3.list_prefix(docs_path) + done_files_docs = [file.split("/")[-1] for file in done_files_docs] + # print(f"S3 CI files for the docs [{docs_digest}]: {done_files_docs}") + done_files += done_files_docs - # b. check if we have something done - path = get_s3_path(build_digest) - done_files = s3.list_prefix(path) - done_files = [file.split("/")[-1] for file in done_files] - print(f"S3 CI files for the build [{build_digest}]: {done_files}") - docs_path = get_s3_path_docs(docs_digest) - done_files_docs = s3.list_prefix(docs_path) - done_files_docs = [file.split("/")[-1] for file in done_files_docs] - print(f"S3 CI files for the docs [{docs_digest}]: {done_files_docs}") - done_files += done_files_docs for job in digests: digest = digests[job] job_config = CI_CONFIG.get_job_config(job) @@ -384,7 +407,7 @@ def _configure_jobs( for batch in range(num_batches): # type: ignore success_flag_name = get_file_flag_name(job, digest, batch, num_batches) if success_flag_name not in done_files or ( - rebuild_all_binaries and is_build_job(job) + rebuild_all_binaries and CI_CONFIG.is_build_job(job) ): batches_to_do.append(batch) @@ -395,8 +418,9 @@ def _configure_jobs( "num_batches": num_batches, } else: - jobs_to_skip += (job,) + jobs_to_skip.append(job) + ## c. check CI controlling labels commit messages if pr_labels: jobs_requested_by_label = [] # type: List[str] ci_controlling_labels = [] # type: List[str] @@ -410,41 +434,65 @@ def _configure_jobs( print( f" : following jobs will be executed: [{jobs_requested_by_label}]" ) - jobs_to_do = jobs_requested_by_label + jobs_to_do = [job for job in jobs_requested_by_label if job in jobs_to_do] if commit_tokens: + jobs_to_do_requested = [] # type: List[str] + + # handle ci set tokens + ci_controlling_tokens = [ + token for token in commit_tokens if token in CI_CONFIG.label_configs + ] + for token_ in ci_controlling_tokens: + label_config = CI_CONFIG.get_label_config(token_) + assert label_config, f"Unknonwn token [{token_}]" + print( + f"NOTE: CI controlling token: [{ci_controlling_tokens}], add jobs: [{label_config.run_jobs}]" + ) + jobs_to_do_requested += label_config.run_jobs + + # handle specific job requests requested_jobs = [ - token[len("#job_") :] - for token in commit_tokens - if token.startswith("#job_") + token[len("job_") :] for token in commit_tokens if token.startswith("job_") ] if requested_jobs: assert any( len(x) > 1 for x in requested_jobs ), f"Invalid job names requested [{requested_jobs}]" - jobs_to_do_requested = [] for job in requested_jobs: job_with_parents = CI_CONFIG.get_job_with_parents(job) + print( + f"NOTE: CI controlling token: [#job_{job}], add jobs: [{job_with_parents}]" + ) # always add requested job itself, even if it could be skipped jobs_to_do_requested.append(job_with_parents[0]) for parent in job_with_parents[1:]: if parent in jobs_to_do and parent not in jobs_to_do_requested: jobs_to_do_requested.append(parent) + + if jobs_to_do_requested: print( f"NOTE: Only specific job(s) were requested by commit message tokens: [{jobs_to_do_requested}]" ) - jobs_to_do = jobs_to_do_requested + jobs_to_do = list( + set(job for job in jobs_to_do_requested if job in jobs_to_do) + ) return { "digests": digests, "jobs_to_do": jobs_to_do, "jobs_to_skip": jobs_to_skip, - "jobs_params": jobs_params, + "jobs_params": { + job: params for job, params in jobs_params.items() if job in jobs_to_do + }, } def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None: - # This action is required to re-create all GH statuses for skipped jobs, so that ci report can be generated afterwards + if indata["ci_flags"][Labels.NO_CI_CACHE]: + print("CI cache is disabled - skip restoring commit statuses from CI cache") + return + temp_path = Path(TEMP_PATH) if not temp_path.exists(): temp_path.mkdir(parents=True, exist_ok=True) @@ -485,7 +533,7 @@ def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None: job_status = CommitStatusData.load_from_file( f"{TEMP_PATH}/{success_flag_name}" ) # type: CommitStatusData - assert job_status.status == "success", "BUG!" + assert job_status.status == SUCCESS, "BUG!" commit.create_status( state=job_status.status, target_url=job_status.report_url, @@ -500,7 +548,7 @@ def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None: with concurrent.futures.ThreadPoolExecutor() as executor: futures = [] for job in job_digests: - if is_build_job(job): + if CI_CONFIG.is_build_job(job): # no GH status for build jobs continue digest = job_digests[job] @@ -528,25 +576,263 @@ def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None: def _fetch_commit_tokens(message: str) -> List[str]: pattern = r"#[\w-]+" - matches = re.findall(pattern, message) - res = [ - match - for match in matches - if match == "#no-merge-commit" - or match.startswith("#job_") - or match.startswith("#job-") - ] + matches = [match[1:] for match in re.findall(pattern, message)] + res = [match for match in matches if match in Labels or match.startswith("job_")] return res +def _upload_build_artifacts( + pr_info: PRInfo, + build_name: str, + build_digest: str, + job_report: JobReport, + s3: S3Helper, + s3_destination: str, +) -> str: + # There are ugly artifacts for the performance test. FIXME: + s3_performance_path = "/".join( + ( + get_release_or_pr(pr_info, get_version_from_repo())[1], + pr_info.sha, + CI_CONFIG.normalize_string(build_name), + "performance.tar.zst", + ) + ) + performance_urls = [] + assert job_report.build_dir_for_upload, "Must be set for build job" + performance_path = Path(job_report.build_dir_for_upload) / "performance.tar.zst" + if performance_path.exists(): + performance_urls.append( + s3.upload_build_file_to_s3(performance_path, s3_performance_path) + ) + print( + "Uploaded performance.tar.zst to %s, now delete to avoid duplication", + performance_urls[0], + ) + performance_path.unlink() + build_urls = ( + s3.upload_build_directory_to_s3( + Path(job_report.build_dir_for_upload), + s3_destination, + keep_dirs_in_s3_path=False, + upload_symlinks=False, + ) + + performance_urls + ) + print("::notice ::Build URLs: {}".format("\n".join(build_urls))) + log_path = Path(job_report.additional_files[0]) + log_url = "" + if log_path.exists(): + log_url = s3.upload_build_file_to_s3( + log_path, s3_destination + "/" + log_path.name + ) + print(f"::notice ::Log URL: {log_url}") + + # generate and upload build report + build_result = BuildResult( + build_name, + log_url, + build_urls, + job_report.version, + job_report.status, + int(job_report.duration), + GITHUB_JOB_API_URL(), + head_ref=pr_info.head_ref, + pr_number=pr_info.number, + ) + result_json_path = build_result.write_json() + s3_path = get_s3_path(build_digest) + result_json_path.name + build_report_url = s3.upload_file( + bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path + ) + print(f"Report file [{result_json_path}] has been uploaded to [{build_report_url}]") + + # Upload head master binaries + static_bin_name = CI_CONFIG.build_config[build_name].static_binary_name + if pr_info.is_master() and static_bin_name: + # Full binary with debug info: + s3_path_full = "/".join((pr_info.base_ref, static_bin_name, "clickhouse-full")) + binary_full = Path(job_report.build_dir_for_upload) / "clickhouse" + url_full = s3.upload_build_file_to_s3(binary_full, s3_path_full) + print(f"::notice ::Binary static URL (with debug info): {url_full}") + + # Stripped binary without debug info: + s3_path_compact = "/".join((pr_info.base_ref, static_bin_name, "clickhouse")) + binary_compact = Path(job_report.build_dir_for_upload) / "clickhouse-stripped" + url_compact = s3.upload_build_file_to_s3(binary_compact, s3_path_compact) + print(f"::notice ::Binary static URL (compact): {url_compact}") + + return log_url + + +def _upload_build_profile_data( + pr_info: PRInfo, + build_name: str, + job_report: JobReport, + git_runner: GitRunner, + ch_helper: ClickHouseHelper, +) -> None: + ci_logs_credentials = CiLogsCredentials(Path("/dev/null")) + if ci_logs_credentials.host: + instance_type = get_instance_type() + instance_id = get_instance_id() + query = f"""INSERT INTO build_time_trace + ( + pull_request_number, + commit_sha, + check_start_time, + check_name, + instance_type, + instance_id, + file, + library, + time, + pid, + tid, + ph, + ts, + dur, + cat, + name, + detail, + count, + avgMs, + args_name + ) + SELECT {pr_info.number}, '{pr_info.sha}', '{job_report.start_time}', '{build_name}', '{instance_type}', '{instance_id}', * + FROM input(' + file String, + library String, + time DateTime64(6), + pid UInt32, + tid UInt32, + ph String, + ts UInt64, + dur UInt64, + cat String, + name String, + detail String, + count UInt64, + avgMs UInt64, + args_name String') + FORMAT JSONCompactEachRow""" + + auth = { + "X-ClickHouse-User": "ci", + "X-ClickHouse-Key": ci_logs_credentials.password, + } + url = f"https://{ci_logs_credentials.host}/" + profiles_dir = Path(TEMP_PATH) / "profiles_source" + profiles_dir.mkdir(parents=True, exist_ok=True) + print( + "Processing profile JSON files from %s", + Path(REPO_COPY) / "build_docker", + ) + git_runner( + "./utils/prepare-time-trace/prepare-time-trace.sh " + f"build_docker {profiles_dir.absolute()}" + ) + profile_data_file = Path(TEMP_PATH) / "profile.json" + with open(profile_data_file, "wb") as profile_fd: + for profile_source in profiles_dir.iterdir(): + if profile_source.name != "binary_sizes.txt": + with open(profiles_dir / profile_source, "rb") as ps_fd: + profile_fd.write(ps_fd.read()) + + print( + "::notice ::Log Uploading profile data, path: %s, size: %s, query: %s", + profile_data_file, + profile_data_file.stat().st_size, + query, + ) + ch_helper.insert_file(url, auth, query, profile_data_file) + + query = f"""INSERT INTO binary_sizes + ( + pull_request_number, + commit_sha, + check_start_time, + check_name, + instance_type, + instance_id, + file, + size + ) + SELECT {pr_info.number}, '{pr_info.sha}', '{job_report.start_time}', '{build_name}', '{instance_type}', '{instance_id}', file, size + FROM input('size UInt64, file String') + SETTINGS format_regexp = '^\\s*(\\d+) (.+)$' + FORMAT Regexp""" + + binary_sizes_file = profiles_dir / "binary_sizes.txt" + + print( + "::notice ::Log Uploading binary sizes data, path: %s, size: %s, query: %s", + binary_sizes_file, + binary_sizes_file.stat().st_size, + query, + ) + ch_helper.insert_file(url, auth, query, binary_sizes_file) + + +def _run_test(job_name: str, run_command: str) -> int: + assert ( + run_command or CI_CONFIG.get_job_config(job_name).run_command + ), "Run command must be provided as input argument or be configured in job config" + + if not run_command: + if CI_CONFIG.get_job_config(job_name).timeout: + os.environ["KILL_TIMEOUT"] = str(CI_CONFIG.get_job_config(job_name).timeout) + run_command = "/".join( + (os.path.dirname(__file__), CI_CONFIG.get_job_config(job_name).run_command) + ) + if ".py" in run_command and not run_command.startswith("python"): + run_command = "python3 " + run_command + print("Use run command from a job config") + else: + print("Use run command from the workflow") + os.environ["CHECK_NAME"] = job_name + print(f"Going to start run command [{run_command}]") + process = subprocess.run( + run_command, + stdout=sys.stdout, + stderr=sys.stderr, + text=True, + check=False, + shell=True, + ) + + if process.returncode == 0: + print(f"Run action done for: [{job_name}]") + exit_code = 0 + else: + print( + f"Run action failed for: [{job_name}] with exit code [{process.returncode}]" + ) + exit_code = process.returncode + return exit_code + + +def _get_ext_check_name(check_name: str) -> str: + run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0")) + run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0")) + if run_by_hash_total > 1: + check_name_with_group = ( + check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]" + ) + else: + check_name_with_group = check_name + return check_name_with_group + + def main() -> int: + logging.basicConfig(level=logging.INFO) exit_code = 0 parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) args = parse_args(parser) - if args.mark_success or args.pre or args.post or args.run: + if args.mark_success or args.pre or args.run: assert args.infile, "Run config must be provided via --infile" assert args.job_name, "Job name must be provided via --job-name" @@ -561,26 +847,34 @@ def main() -> int: result: Dict[str, Any] = {} s3 = S3Helper() + pr_info = PRInfo() + git_runner = GitRunner(set_cwd_to_git_root=True) + ### CONFIGURE action: start if args.configure: - GR = GitRunner() - pr_info = PRInfo() - docker_data = {} - git_ref = GR.run(f"{GIT_PREFIX} rev-parse HEAD") + git_ref = git_runner.run(f"{GIT_PREFIX} rev-parse HEAD") - # if '#no-merge-commit' is set in commit message - set git ref to PR branch head to avoid merge-commit + # if '#no_merge_commit' is set in commit message - set git ref to PR branch head to avoid merge-commit tokens = [] - if pr_info.number != 0 and not args.skip_jobs: - message = GR.run(f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1") + ci_flags = { + Labels.NO_MERGE_COMMIT: False, + Labels.NO_CI_CACHE: False, + } + if (pr_info.number != 0 and not args.skip_jobs) or args.commit_message: + message = args.commit_message or git_runner.run( + f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1" + ) tokens = _fetch_commit_tokens(message) - print(f"Found commit message tokens: [{tokens}]") - if "#no-merge-commit" in tokens and CI: - GR.run(f"{GIT_PREFIX} checkout {pr_info.sha}") - git_ref = GR.run(f"{GIT_PREFIX} rev-parse HEAD") - print( - "#no-merge-commit is set in commit message - Setting git ref to PR branch HEAD to not use merge commit" - ) + print(f"Commit message tokens: [{tokens}]") + if Labels.NO_MERGE_COMMIT in tokens and CI: + git_runner.run(f"{GIT_PREFIX} checkout {pr_info.sha}") + git_ref = git_runner.run(f"{GIT_PREFIX} rev-parse HEAD") + ci_flags[Labels.NO_MERGE_COMMIT] = True + print("NOTE: Disable Merge Commit") + if Labels.NO_CI_CACHE in tokens: + ci_flags[Labels.NO_CI_CACHE] = True + print("NOTE: Disable CI Cache") # let's get CH version version = get_version_from_repo(git=Git(True)).string @@ -607,9 +901,11 @@ def main() -> int: docs_digest, job_digester, s3, - args.rebuild_all_binaries, + # FIXME: add suport for master wf w/o rebuilds + args.rebuild_all_binaries or pr_info.is_master(), pr_info.labels, tokens, + ci_flags[Labels.NO_CI_CACHE], ) if not args.skip_jobs else {} @@ -620,6 +916,7 @@ def main() -> int: result["version"] = version result["build"] = build_digest result["docs"] = docs_digest + result["ci_flags"] = ci_flags result["jobs_data"] = jobs_data result["docker_data"] = docker_data if pr_info.number != 0 and not args.docker_digest_or_latest: @@ -628,82 +925,191 @@ def main() -> int: _check_and_update_for_early_style_check(result) if pr_info.has_changes_in_documentation_only(): _update_config_for_docs_only(result) + ### CONFIGURE action: end - elif args.update_gh_statuses: - assert indata, "Run config must be provided via --infile" - _update_gh_statuses(indata=indata, s3=s3) - + ### PRE action: start elif args.pre: - # remove job status file if any CommitStatusData.cleanup() + JobReport.cleanup() + BuildResult.cleanup() - if is_test_job(args.job_name): - assert indata, "Run config must be provided via --infile" - report_path = Path(REPORT_PATH) - report_path.mkdir(exist_ok=True, parents=True) - path = get_s3_path(indata["build"]) - files = s3.download_files( # type: ignore - bucket=S3_BUILDS_BUCKET, - s3_path=path, - file_suffix=".json", - local_directory=report_path, - ) - print( - f"Pre action done. Report files [{files}] have been downloaded from [{path}] to [{report_path}]" - ) - else: - print(f"Pre action done. Nothing to do for [{args.job_name}]") + assert indata, "Run config must be provided via --infile" + report_path = Path(REPORT_PATH) + report_path.mkdir(exist_ok=True, parents=True) + path = get_s3_path(indata["build"]) + reports_files = s3.download_files( # type: ignore + bucket=S3_BUILDS_BUCKET, + s3_path=path, + file_suffix=".json", + local_directory=report_path, + ) + # for release/master branches reports must be created on the same branches + files = [] + if pr_info.number == 0: + for file in reports_files: + if pr_info.head_ref not in file: + # keep reports from the same branch only, if not in a PR + (report_path / file).unlink() + print(f"drop report: [{report_path / file}]") + else: + files.append(file) + reports_files = files + print( + f"Pre action done. Report files [{reports_files}] have been downloaded from [{path}] to [{report_path}]" + ) + ### PRE action: end + ### RUN action: start elif args.run: - assert CI_CONFIG.get_job_config( - args.job_name - ).run_command, f"Run command must be configured in CI_CONFIG for [{args.job_name}] or in GH workflow" - if CI_CONFIG.get_job_config(args.job_name).timeout: - os.environ["KILL_TIMEOUT"] = str( - CI_CONFIG.get_job_config(args.job_name).timeout + assert indata + check_name = args.job_name + check_name_with_group = _get_ext_check_name(check_name) + print( + f"Check if rerun for name: [{check_name}], extended name [{check_name_with_group}]" + ) + previous_status = None + if CI_CONFIG.is_build_job(check_name): + # this is a build job - check if build report is present + build_result = ( + BuildResult.load_any(check_name, pr_info.number, pr_info.head_ref) + if not indata["ci_flags"][Labels.NO_CI_CACHE] + else None ) - os.environ["CHECK_NAME"] = args.job_name - run_command = ( - "./tests/ci/" + CI_CONFIG.get_job_config(args.job_name).run_command - ) - if ".py" in run_command: - run_command = "python3 " + run_command - print(f"Going to start run command [{run_command}]") - process = subprocess.run( - run_command, - stdout=sys.stdout, - stderr=sys.stderr, - text=True, - check=False, - shell=True, - ) - if process.returncode == 0: - print(f"Run action done for: [{args.job_name}]") + if build_result: + if build_result.status == SUCCESS: + previous_status = build_result.status + else: + # FIXME: Consider reusing failures for build jobs. + # Just remove this if/else - that makes build job starting and failing immediately + print( + "Build report found but status is unsuccessful - will try to rerun" + ) + print("::group::Build Report") + print(build_result.as_json()) + print("::endgroup::") else: - print( - f"Run action failed for: [{args.job_name}] with exit code [{process.returncode}]" + # this is a test job - check if GH commit status is present + commit = get_commit( + Github(get_best_robot_token(), per_page=100), pr_info.sha ) - exit_code = process.returncode + rerun_helper = RerunHelper(commit, check_name_with_group) + if rerun_helper.is_already_finished_by_status(): + status = rerun_helper.get_finished_status() + assert status + previous_status = status.state + print("::group::Commit Status") + print(status) + print("::endgroup::") + if previous_status: + print( + f"Commit status or Build Report is already present - job will be skipped with status: [{previous_status}]" + ) + if previous_status == SUCCESS: + exit_code = 0 + else: + exit_code = 1 + else: + exit_code = _run_test(check_name, args.run_command) + ### RUN action: end + + ### POST action: start elif args.post: - if is_build_job(args.job_name): - report_path = Path(TEMP_PATH) # build-check.py stores report in TEMP_PATH - assert report_path.is_dir(), f"File [{report_path}] is not a dir" - files = list(report_path.glob(f"*{args.job_name}.json")) # type: ignore[arg-type] - assert len(files) == 1, f"Which is the report file: {files}?" - local_report = f"{files[0]}" - report_name = BuildResult.get_report_name(args.job_name) - assert indata - s3_path = Path(get_s3_path(indata["build"])) / report_name - report_url = s3.upload_file( - bucket=S3_BUILDS_BUCKET, file_path=local_report, s3_path=s3_path + assert ( + not CI_CONFIG.is_build_job(args.job_name) or indata + ), "--infile with config must be provided for POST action of a build type job [{args.job_name}]" + job_report = JobReport.load() if JobReport.exist() else None + if job_report: + ch_helper = ClickHouseHelper() + check_url = "" + + if CI_CONFIG.is_build_job(args.job_name): + build_name = args.job_name + s3_path_prefix = "/".join( + ( + get_release_or_pr(pr_info, get_version_from_repo())[0], + pr_info.sha, + build_name, + ) + ) + log_url = _upload_build_artifacts( + pr_info, + build_name, + build_digest=indata["build"], # type: ignore + job_report=job_report, + s3=s3, + s3_destination=s3_path_prefix, + ) + _upload_build_profile_data( + pr_info, build_name, job_report, git_runner, ch_helper + ) + check_url = log_url + else: + # test job + additional_urls = [] + s3_path_prefix = "/".join( + ( + get_release_or_pr(pr_info, get_version_from_repo())[0], + pr_info.sha, + CI_CONFIG.normalize_string( + job_report.check_name or _get_ext_check_name(args.job_name) + ), + ) + ) + if job_report.build_dir_for_upload: + additional_urls = s3.upload_build_directory_to_s3( + Path(job_report.build_dir_for_upload), + s3_path_prefix, + keep_dirs_in_s3_path=False, + upload_symlinks=False, + ) + if job_report.test_results or job_report.additional_files: + check_url = upload_result_helper.upload_results( + s3, + pr_info.number, + pr_info.sha, + job_report.test_results, + job_report.additional_files, + job_report.check_name or args.job_name, + additional_urls=additional_urls or None, + ) + commit = get_commit( + Github(get_best_robot_token(), per_page=100), pr_info.sha + ) + post_commit_status( + commit, + job_report.status, + check_url, + format_description(job_report.description), + job_report.check_name or args.job_name, + pr_info, + dump_to_file=True, + ) + update_mergeable_check( + commit, + pr_info, + job_report.check_name or _get_ext_check_name(args.job_name), + ) + + print(f"Job report url: [{check_url}]") + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + job_report.test_results, + job_report.status, + job_report.duration, + job_report.start_time, + check_url or "", + job_report.check_name or args.job_name, ) - print( - f"Post action done. Report file [{local_report}] has been uploaded to [{report_url}]" + ch_helper.insert_events_into( + db="default", table="checks", events=prepared_events ) else: - print(f"Post action done. Nothing to do for [{args.job_name}]") + # no job report + print(f"No job report for {[args.job_name]} - do nothing") + ### POST action: end + ### MARK SUCCESS action: start elif args.mark_success: assert indata, "Run config must be provided via --infile" job = args.job_name @@ -715,7 +1121,7 @@ def main() -> int: # FIXME: find generic design for propagating and handling job status (e.g. stop using statuses in GH api) # now job ca be build job w/o status data, any other job that exit with 0 with or w/o status data - if is_build_job(job): + if CI_CONFIG.is_build_job(job): # there is no status for build jobs # create dummy success to mark it as done job_status = CommitStatusData( @@ -742,7 +1148,7 @@ def main() -> int: success_flag_name = get_file_flag_name( job, indata["jobs_data"]["digests"][job], args.batch, num_batches ) - if not is_docs_job(job): + if not CI_CONFIG.is_docs_job(job): path = get_s3_path(indata["build"]) + success_flag_name else: path = get_s3_path_docs(indata["docs"]) + success_flag_name @@ -756,8 +1162,15 @@ def main() -> int: ) else: print(f"Job [{job}] is not ok, status [{job_status.status}]") + ### MARK SUCCESS action: end - # print results + ### UPDATE GH STATUSES action: start + elif args.update_gh_statuses: + assert indata, "Run config must be provided via --infile" + _update_gh_statuses(indata=indata, s3=s3) + ### UPDATE GH STATUSES action: end + + ### print results if args.outfile: with open(args.outfile, "w") as f: if isinstance(result, str): @@ -773,10 +1186,8 @@ def main() -> int: print(json.dumps(result, indent=2 if args.pretty else None)) else: raise AssertionError(f"Unexpected type for 'res': {type(result)}") - return exit_code if __name__ == "__main__": - os.chdir(ROOT_DIR) sys.exit(main()) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 8365910425c..89133f6cb79 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -3,15 +3,139 @@ import logging from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser from dataclasses import dataclass, field -from enum import Enum from pathlib import Path from typing import Callable, Dict, Iterable, List, Literal, Optional, Union from integration_test_images import IMAGES +from ci_utils import WithIter -class Labels(Enum): - DO_NOT_TEST_LABEL = "do not test" +class Labels(metaclass=WithIter): + DO_NOT_TEST_LABEL = "do_not_test" + NO_MERGE_COMMIT = "no_merge_commit" + NO_CI_CACHE = "no_ci_cache" + CI_SET_REDUCED = "ci_set_reduced" + + +class Build(metaclass=WithIter): + PACKAGE_RELEASE = "package_release" + PACKAGE_AARCH64 = "package_aarch64" + PACKAGE_ASAN = "package_asan" + PACKAGE_UBSAN = "package_ubsan" + PACKAGE_TSAN = "package_tsan" + PACKAGE_MSAN = "package_msan" + PACKAGE_DEBUG = "package_debug" + BINARY_RELEASE = "binary_release" + BINARY_TIDY = "binary_tidy" + BINARY_DARWIN = "binary_darwin" + BINARY_AARCH64 = "binary_aarch64" + BINARY_AARCH64_V80COMPAT = "binary_aarch64_v80compat" + BINARY_FREEBSD = "binary_freebsd" + BINARY_DARWIN_AARCH64 = "binary_darwin_aarch64" + BINARY_PPC64LE = "binary_ppc64le" + BINARY_AMD64_COMPAT = "binary_amd64_compat" + BINARY_AMD64_MUSL = "binary_amd64_musl" + BINARY_RISCV64 = "binary_riscv64" + BINARY_S390X = "binary_s390x" + FUZZERS = "fuzzers" + + +class JobNames(metaclass=WithIter): + STYLE_CHECK = "Style check" + FAST_TEST = "Fast tests" + DOCKER_SERVER = "Docker server and keeper images" + INSTALL_TEST_AMD = "Install packages (amd64)" + INSTALL_TEST_ARM = "Install packages (arm64)" + + STATELESS_TEST_DEBUG = "Stateless tests (debug)" + STATELESS_TEST_RELEASE = "Stateless tests (release)" + STATELESS_TEST_AARCH64 = "Stateless tests (aarch64)" + STATELESS_TEST_ASAN = "Stateless tests (asan)" + STATELESS_TEST_TSAN = "Stateless tests (tsan)" + STATELESS_TEST_MSAN = "Stateless tests (msan)" + STATELESS_TEST_UBSAN = "Stateless tests (ubsan)" + STATELESS_TEST_ANALYZER_RELEASE = "Stateless tests (release, analyzer)" + STATELESS_TEST_DB_REPL_RELEASE = "Stateless tests (release, DatabaseReplicated)" + STATELESS_TEST_S3_RELEASE = "Stateless tests (release, s3 storage)" + STATELESS_TEST_S3_DEBUG = "Stateless tests (debug, s3 storage)" + STATELESS_TEST_S3_TSAN = "Stateless tests (tsan, s3 storage)" + STATELESS_TEST_FLAKY_ASAN = "Stateless tests flaky check (asan)" + + STATEFUL_TEST_DEBUG = "Stateful tests (debug)" + STATEFUL_TEST_RELEASE = "Stateful tests (release)" + STATEFUL_TEST_AARCH64 = "Stateful tests (aarch64)" + STATEFUL_TEST_ASAN = "Stateful tests (asan)" + STATEFUL_TEST_TSAN = "Stateful tests (tsan)" + STATEFUL_TEST_MSAN = "Stateful tests (msan)" + STATEFUL_TEST_UBSAN = "Stateful tests (ubsan)" + STATEFUL_TEST_PARALLEL_REPL_RELEASE = "Stateful tests (release, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_DEBUG = "Stateful tests (debug, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_ASAN = "Stateful tests (asan, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_MSAN = "Stateful tests (msan, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_UBSAN = "Stateful tests (ubsan, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_TSAN = "Stateful tests (tsan, ParallelReplicas)" + + STRESS_TEST_ASAN = "Stress test (asan)" + STRESS_TEST_TSAN = "Stress test (tsan)" + STRESS_TEST_UBSAN = "Stress test (ubsan)" + STRESS_TEST_MSAN = "Stress test (msan)" + STRESS_TEST_DEBUG = "Stress test (debug)" + + INTEGRATION_TEST = "Integration tests (release)" + INTEGRATION_TEST_ASAN = "Integration tests (asan)" + INTEGRATION_TEST_ASAN_ANALYZER = "Integration tests (asan, analyzer)" + INTEGRATION_TEST_TSAN = "Integration tests (tsan)" + INTEGRATION_TEST_FLAKY = "Integration tests flaky check (asan)" + + UPGRADE_TEST_DEBUG = "Upgrade check (debug)" + UPGRADE_TEST_ASAN = "Upgrade check (asan)" + UPGRADE_TEST_TSAN = "Upgrade check (tsan)" + UPGRADE_TEST_MSAN = "Upgrade check (msan)" + + UNIT_TEST = "Unit tests (release)" + UNIT_TEST_ASAN = "Unit tests (asan)" + UNIT_TEST_MSAN = "Unit tests (msan)" + UNIT_TEST_TSAN = "Unit tests (tsan)" + UNIT_TEST_UBSAN = "Unit tests (ubsan)" + + AST_FUZZER_TEST_DEBUG = "AST fuzzer (debug)" + AST_FUZZER_TEST_ASAN = "AST fuzzer (asan)" + AST_FUZZER_TEST_MSAN = "AST fuzzer (msan)" + AST_FUZZER_TEST_TSAN = "AST fuzzer (tsan)" + AST_FUZZER_TEST_UBSAN = "AST fuzzer (ubsan)" + + JEPSEN_KEEPER = "ClickHouse Keeper Jepsen" + JEPSEN_SERVER = "ClickHouse Server Jepsen" + + PERFORMANCE_TEST_AMD64 = "Performance Comparison" + PERFORMANCE_TEST_ARM64 = "Performance Comparison Aarch64" + + SQL_LANCER_TEST = "SQLancer (release)" + SQL_LOGIC_TEST = "Sqllogic test (release)" + + SQLANCER = "SQLancer (release)" + SQLANCER_DEBUG = "SQLancer (debug)" + SQLTEST = "SQLTest" + + COMPATIBILITY_TEST = "Compatibility check (amd64)" + COMPATIBILITY_TEST_ARM = "Compatibility check (aarch64)" + + CLCIKBENCH_TEST = "ClickBench (amd64)" + CLCIKBENCH_TEST_ARM = "ClickBench (aarch64)" + + LIBFUZZER_TEST = "libFuzzer tests" + + BUILD_CHECK = "ClickHouse build check" + BUILD_CHECK_SPECIAL = "ClickHouse special build check" + + DOCS_CHECK = "Docs check" + BUGFIX_VALIDATE = "tests bugfix validate check" + + +# dynamically update JobName with Build jobs +for attr_name in dir(Build): + if not attr_name.startswith("__") and not callable(getattr(Build, attr_name)): + setattr(JobNames, attr_name, getattr(Build, attr_name)) @dataclass @@ -31,7 +155,7 @@ class DigestConfig: @dataclass class LabelConfig: """ - class to configure different CI scenarious per GH label + class to configure different CI scenarious per GH label or commit message token """ run_jobs: Iterable[str] = frozenset() @@ -94,6 +218,7 @@ class BuildConfig: docker=["clickhouse/binary-builder"], git_submodules=True, ), + run_command="build_check.py $BUILD_NAME", ) ) @@ -111,7 +236,16 @@ class BuildConfig: @dataclass class BuildReportConfig: builds: List[str] - job_config: JobConfig = field(default_factory=JobConfig) + job_config: JobConfig = field( + default_factory=lambda: JobConfig( + digest=DigestConfig( + include_paths=[ + "./tests/ci/build_report_check.py", + "./tests/ci/upload_result_helper.py", + ], + ), + ) + ) @dataclass @@ -290,7 +424,7 @@ class CiConfig: def get_label_config(self, label_name: str) -> Optional[LabelConfig]: for label, config in self.label_configs.items(): - if label_name == label: + if self.normalize_string(label_name) == self.normalize_string(label): return config return None @@ -310,20 +444,21 @@ class CiConfig: ), f"Invalid check_name or CI_CONFIG outdated, config not found for [{check_name}]" return res # type: ignore - def get_job_with_parents(self, check_name: str) -> List[str]: - def _normalize_string(input_string: str) -> str: - lowercase_string = input_string.lower() - normalized_string = ( - lowercase_string.replace(" ", "_") - .replace("-", "_") - .replace("(", "") - .replace(")", "") - .replace(",", "") - ) - return normalized_string + @staticmethod + def normalize_string(input_string: str) -> str: + lowercase_string = input_string.lower() + normalized_string = ( + lowercase_string.replace(" ", "_") + .replace("-", "_") + .replace("(", "") + .replace(")", "") + .replace(",", "") + ) + return normalized_string + def get_job_with_parents(self, check_name: str) -> List[str]: res = [] - check_name = _normalize_string(check_name) + check_name = self.normalize_string(check_name) for config in ( self.build_config, @@ -332,18 +467,18 @@ class CiConfig: self.other_jobs_configs, ): for job_name in config: # type: ignore - if check_name == _normalize_string(job_name): + if check_name == self.normalize_string(job_name): res.append(job_name) if isinstance(config[job_name], TestConfig): # type: ignore - assert config[ - job_name - ].required_build, f"Error: Experimantal feature... Not supported job [{job_name}]" # type: ignore - res.append(config[job_name].required_build) # type: ignore - res.append("Fast tests") - res.append("Style check") + if config[job_name].required_build: # type: ignore + res.append(config[job_name].required_build) # type: ignore elif isinstance(config[job_name], BuildConfig): # type: ignore - res.append("Fast tests") - res.append("Style check") + pass + elif isinstance(config[job_name], BuildReportConfig): # type: ignore + # add all build jobs as parents for build report check + res.extend( + [job for job in JobNames if job in self.build_config] + ) else: assert ( False @@ -385,6 +520,22 @@ class CiConfig: def get_builds_for_report(self, report_name: str) -> List[str]: return self.builds_report_config[report_name].builds + @classmethod + def is_build_job(cls, job: str) -> bool: + return job in Build + + @classmethod + def is_test_job(cls, job: str) -> bool: + return ( + not cls.is_build_job(job) + and not cls.is_build_job(job) + and job != JobNames.STYLE_CHECK + ) + + @classmethod + def is_docs_job(cls, job: str) -> bool: + return job != JobNames.DOCS_CHECK + def validate(self) -> None: errors = [] for name, build_config in self.build_config.items(): @@ -443,61 +594,78 @@ class CiConfig: CI_CONFIG = CiConfig( label_configs={ - Labels.DO_NOT_TEST_LABEL.value: LabelConfig(run_jobs=["Style check"]), + Labels.DO_NOT_TEST_LABEL: LabelConfig(run_jobs=[JobNames.STYLE_CHECK]), + Labels.CI_SET_REDUCED: LabelConfig( + run_jobs=[ + job + for job in JobNames + if not any( + [ + nogo in job + for nogo in ( + "asan", + "tsan", + "msan", + "ubsan", + ) + ] + ) + ] + ), }, build_config={ - "package_release": BuildConfig( - name="package_release", + Build.PACKAGE_RELEASE: BuildConfig( + name=Build.PACKAGE_RELEASE, compiler="clang-17", package_type="deb", static_binary_name="amd64", additional_pkgs=True, ), - "package_aarch64": BuildConfig( - name="package_aarch64", + Build.PACKAGE_AARCH64: BuildConfig( + name=Build.PACKAGE_AARCH64, compiler="clang-17-aarch64", package_type="deb", static_binary_name="aarch64", additional_pkgs=True, ), - "package_asan": BuildConfig( - name="package_asan", + Build.PACKAGE_ASAN: BuildConfig( + name=Build.PACKAGE_ASAN, compiler="clang-17", sanitizer="address", package_type="deb", ), - "package_ubsan": BuildConfig( - name="package_ubsan", + Build.PACKAGE_UBSAN: BuildConfig( + name=Build.PACKAGE_UBSAN, compiler="clang-17", sanitizer="undefined", package_type="deb", ), - "package_tsan": BuildConfig( - name="package_tsan", + Build.PACKAGE_TSAN: BuildConfig( + name=Build.PACKAGE_TSAN, compiler="clang-17", sanitizer="thread", package_type="deb", ), - "package_msan": BuildConfig( - name="package_msan", + Build.PACKAGE_MSAN: BuildConfig( + name=Build.PACKAGE_MSAN, compiler="clang-17", sanitizer="memory", package_type="deb", ), - "package_debug": BuildConfig( - name="package_debug", + Build.PACKAGE_DEBUG: BuildConfig( + name=Build.PACKAGE_DEBUG, compiler="clang-17", debug_build=True, package_type="deb", sparse_checkout=True, # Check that it works with at least one build, see also update-submodules.sh ), - "binary_release": BuildConfig( - name="binary_release", + Build.BINARY_RELEASE: BuildConfig( + name=Build.BINARY_RELEASE, compiler="clang-17", package_type="binary", ), - "binary_tidy": BuildConfig( - name="binary_tidy", + Build.BINARY_TIDY: BuildConfig( + name=Build.BINARY_TIDY, compiler="clang-17", debug_build=True, package_type="binary", @@ -505,107 +673,107 @@ CI_CONFIG = CiConfig( tidy=True, comment="clang-tidy is used for static analysis", ), - "binary_darwin": BuildConfig( - name="binary_darwin", + Build.BINARY_DARWIN: BuildConfig( + name=Build.BINARY_DARWIN, compiler="clang-17-darwin", package_type="binary", static_binary_name="macos", sparse_checkout=True, # Check that it works with at least one build, see also update-submodules.sh ), - "binary_aarch64": BuildConfig( - name="binary_aarch64", + Build.BINARY_AARCH64: BuildConfig( + name=Build.BINARY_AARCH64, compiler="clang-17-aarch64", package_type="binary", ), - "binary_aarch64_v80compat": BuildConfig( - name="binary_aarch64_v80compat", + Build.BINARY_AARCH64_V80COMPAT: BuildConfig( + name=Build.BINARY_AARCH64_V80COMPAT, compiler="clang-17-aarch64-v80compat", package_type="binary", static_binary_name="aarch64v80compat", comment="For ARMv8.1 and older", ), - "binary_freebsd": BuildConfig( - name="binary_freebsd", + Build.BINARY_FREEBSD: BuildConfig( + name=Build.BINARY_FREEBSD, compiler="clang-17-freebsd", package_type="binary", static_binary_name="freebsd", ), - "binary_darwin_aarch64": BuildConfig( - name="binary_darwin_aarch64", + Build.BINARY_DARWIN_AARCH64: BuildConfig( + name=Build.BINARY_DARWIN_AARCH64, compiler="clang-17-darwin-aarch64", package_type="binary", static_binary_name="macos-aarch64", ), - "binary_ppc64le": BuildConfig( - name="binary_ppc64le", + Build.BINARY_PPC64LE: BuildConfig( + name=Build.BINARY_PPC64LE, compiler="clang-17-ppc64le", package_type="binary", static_binary_name="powerpc64le", ), - "binary_amd64_compat": BuildConfig( - name="binary_amd64_compat", + Build.BINARY_AMD64_COMPAT: BuildConfig( + name=Build.BINARY_AMD64_COMPAT, compiler="clang-17-amd64-compat", package_type="binary", static_binary_name="amd64compat", comment="SSE2-only build", ), - "binary_amd64_musl": BuildConfig( - name="binary_amd64_musl", + Build.BINARY_AMD64_MUSL: BuildConfig( + name=Build.BINARY_AMD64_MUSL, compiler="clang-17-amd64-musl", package_type="binary", static_binary_name="amd64musl", comment="Build with Musl", ), - "binary_riscv64": BuildConfig( - name="binary_riscv64", + Build.BINARY_RISCV64: BuildConfig( + name=Build.BINARY_RISCV64, compiler="clang-17-riscv64", package_type="binary", static_binary_name="riscv64", ), - "binary_s390x": BuildConfig( - name="binary_s390x", + Build.BINARY_S390X: BuildConfig( + name=Build.BINARY_S390X, compiler="clang-17-s390x", package_type="binary", static_binary_name="s390x", ), - "fuzzers": BuildConfig( - name="fuzzers", + Build.FUZZERS: BuildConfig( + name=Build.FUZZERS, compiler="clang-17", package_type="fuzzers", ), }, builds_report_config={ - "ClickHouse build check": BuildReportConfig( + JobNames.BUILD_CHECK: BuildReportConfig( builds=[ - "package_release", - "package_aarch64", - "package_asan", - "package_ubsan", - "package_tsan", - "package_msan", - "package_debug", - "binary_release", - "fuzzers", + Build.PACKAGE_RELEASE, + Build.PACKAGE_AARCH64, + Build.PACKAGE_ASAN, + Build.PACKAGE_UBSAN, + Build.PACKAGE_TSAN, + Build.PACKAGE_MSAN, + Build.PACKAGE_DEBUG, + Build.BINARY_RELEASE, + Build.FUZZERS, ] ), - "ClickHouse special build check": BuildReportConfig( + JobNames.BUILD_CHECK_SPECIAL: BuildReportConfig( builds=[ - "binary_tidy", - "binary_darwin", - "binary_aarch64", - "binary_aarch64_v80compat", - "binary_freebsd", - "binary_darwin_aarch64", - "binary_ppc64le", - "binary_riscv64", - "binary_s390x", - "binary_amd64_compat", - "binary_amd64_musl", + Build.BINARY_TIDY, + Build.BINARY_DARWIN, + Build.BINARY_AARCH64, + Build.BINARY_AARCH64_V80COMPAT, + Build.BINARY_FREEBSD, + Build.BINARY_DARWIN_AARCH64, + Build.BINARY_PPC64LE, + Build.BINARY_RISCV64, + Build.BINARY_S390X, + Build.BINARY_AMD64_COMPAT, + Build.BINARY_AMD64_MUSL, ] ), }, other_jobs_configs={ - "Docker server and keeper images": TestConfig( + JobNames.DOCKER_SERVER: TestConfig( "", job_config=JobConfig( digest=DigestConfig( @@ -617,7 +785,7 @@ CI_CONFIG = CiConfig( ) ), ), - "Docs check": TestConfig( + JobNames.DOCS_CHECK: TestConfig( "", job_config=JobConfig( digest=DigestConfig( @@ -626,7 +794,7 @@ CI_CONFIG = CiConfig( ), ), ), - "Fast tests": TestConfig( + JobNames.FAST_TEST: TestConfig( "", job_config=JobConfig( digest=DigestConfig( @@ -636,246 +804,249 @@ CI_CONFIG = CiConfig( ) ), ), - "Style check": TestConfig( + JobNames.STYLE_CHECK: TestConfig( "", job_config=JobConfig( run_always=True, ), ), - "tests bugfix validate check": TestConfig( + JobNames.BUGFIX_VALIDATE: TestConfig( "", # we run this check by label - no digest required job_config=JobConfig(run_by_label="pr-bugfix"), ), }, test_configs={ - "Install packages (amd64)": TestConfig( - "package_release", job_config=JobConfig(digest=install_check_digest) + JobNames.INSTALL_TEST_AMD: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(digest=install_check_digest) ), - "Install packages (arm64)": TestConfig( - "package_aarch64", job_config=JobConfig(digest=install_check_digest) + JobNames.INSTALL_TEST_ARM: TestConfig( + Build.PACKAGE_AARCH64, job_config=JobConfig(digest=install_check_digest) ), - "Stateful tests (asan)": TestConfig( - "package_asan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (tsan)": TestConfig( - "package_tsan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (msan)": TestConfig( - "package_msan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (ubsan)": TestConfig( - "package_ubsan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_UBSAN: TestConfig( + Build.PACKAGE_UBSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (debug)": TestConfig( - "package_debug", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (release)": TestConfig( - "package_release", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_RELEASE: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (aarch64)": TestConfig( - "package_aarch64", job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - "Stateful tests (release, DatabaseOrdinary)": TestConfig( - "package_release", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_AARCH64: TestConfig( + Build.PACKAGE_AARCH64, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), + # FIXME: delete? + # "Stateful tests (release, DatabaseOrdinary)": TestConfig( + # Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore + # ), # "Stateful tests (release, DatabaseReplicated)": TestConfig( - # "package_release", job_config=JobConfig(**stateful_test_common_params) # type: ignore + # Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore # ), # Stateful tests for parallel replicas - "Stateful tests (release, ParallelReplicas)": TestConfig( - "package_release", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_PARALLEL_REPL_RELEASE: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (debug, ParallelReplicas)": TestConfig( - "package_debug", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_PARALLEL_REPL_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (asan, ParallelReplicas)": TestConfig( - "package_asan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_PARALLEL_REPL_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (msan, ParallelReplicas)": TestConfig( - "package_msan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_PARALLEL_REPL_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (ubsan, ParallelReplicas)": TestConfig( - "package_ubsan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_PARALLEL_REPL_UBSAN: TestConfig( + Build.PACKAGE_UBSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (tsan, ParallelReplicas)": TestConfig( - "package_tsan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), # End stateful tests for parallel replicas - "Stateless tests (asan)": TestConfig( - "package_asan", + JobNames.STATELESS_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(num_batches=4, **statless_test_common_params), # type: ignore ), - "Stateless tests (tsan)": TestConfig( - "package_tsan", + JobNames.STATELESS_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore ), - "Stateless tests (msan)": TestConfig( - "package_msan", + JobNames.STATELESS_TEST_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(num_batches=6, **statless_test_common_params), # type: ignore ), - "Stateless tests (ubsan)": TestConfig( - "package_ubsan", + JobNames.STATELESS_TEST_UBSAN: TestConfig( + Build.PACKAGE_UBSAN, job_config=JobConfig(num_batches=2, **statless_test_common_params), # type: ignore ), - "Stateless tests (debug)": TestConfig( - "package_debug", + JobNames.STATELESS_TEST_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore ), - "Stateless tests (release)": TestConfig( - "package_release", job_config=JobConfig(**statless_test_common_params) # type: ignore + JobNames.STATELESS_TEST_RELEASE: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params) # type: ignore ), - "Stateless tests (aarch64)": TestConfig( - "package_aarch64", job_config=JobConfig(**statless_test_common_params) # type: ignore + JobNames.STATELESS_TEST_AARCH64: TestConfig( + Build.PACKAGE_AARCH64, job_config=JobConfig(**statless_test_common_params) # type: ignore ), - "Stateless tests (release, analyzer)": TestConfig( - "package_release", job_config=JobConfig(**statless_test_common_params) # type: ignore + JobNames.STATELESS_TEST_ANALYZER_RELEASE: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params) # type: ignore ), - "Stateless tests (release, DatabaseOrdinary)": TestConfig( - "package_release", job_config=JobConfig(**statless_test_common_params) # type: ignore - ), - "Stateless tests (release, DatabaseReplicated)": TestConfig( - "package_release", + # delete? + # "Stateless tests (release, DatabaseOrdinary)": TestConfig( + # Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params) # type: ignore + # ), + JobNames.STATELESS_TEST_DB_REPL_RELEASE: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(num_batches=4, **statless_test_common_params), # type: ignore ), - "Stateless tests (release, s3 storage)": TestConfig( - "package_release", + JobNames.STATELESS_TEST_S3_RELEASE: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(num_batches=2, **statless_test_common_params), # type: ignore ), - "Stateless tests (debug, s3 storage)": TestConfig( - "package_debug", + JobNames.STATELESS_TEST_S3_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(num_batches=6, **statless_test_common_params), # type: ignore ), - "Stateless tests (tsan, s3 storage)": TestConfig( - "package_tsan", + JobNames.STATELESS_TEST_S3_DEBUG: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore ), - "Stress test (asan)": TestConfig( - "package_asan", job_config=JobConfig(**stress_test_common_params) # type: ignore + JobNames.STRESS_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**stress_test_common_params) # type: ignore ), - "Stress test (tsan)": TestConfig( - "package_tsan", job_config=JobConfig(**stress_test_common_params) # type: ignore + JobNames.STRESS_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore ), - "Stress test (ubsan)": TestConfig( - "package_ubsan", job_config=JobConfig(**stress_test_common_params) # type: ignore + JobNames.STRESS_TEST_UBSAN: TestConfig( + Build.PACKAGE_UBSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore ), - "Stress test (msan)": TestConfig( - "package_msan", job_config=JobConfig(**stress_test_common_params) # type: ignore + JobNames.STRESS_TEST_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore ), - "Stress test (debug)": TestConfig( - "package_debug", job_config=JobConfig(**stress_test_common_params) # type: ignore + JobNames.STRESS_TEST_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params) # type: ignore ), - "Upgrade check (asan)": TestConfig( - "package_asan", job_config=JobConfig(**upgrade_test_common_params) # type: ignore + JobNames.UPGRADE_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**upgrade_test_common_params) # type: ignore ), - "Upgrade check (tsan)": TestConfig( - "package_tsan", job_config=JobConfig(**upgrade_test_common_params) # type: ignore + JobNames.UPGRADE_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(**upgrade_test_common_params) # type: ignore ), - "Upgrade check (msan)": TestConfig( - "package_msan", job_config=JobConfig(**upgrade_test_common_params) # type: ignore + JobNames.UPGRADE_TEST_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(**upgrade_test_common_params) # type: ignore ), - "Upgrade check (debug)": TestConfig( - "package_debug", job_config=JobConfig(**upgrade_test_common_params) # type: ignore + JobNames.UPGRADE_TEST_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**upgrade_test_common_params) # type: ignore ), - "Integration tests (asan)": TestConfig( - "package_asan", + JobNames.INTEGRATION_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(num_batches=4, **integration_test_common_params), # type: ignore ), - "Integration tests (asan, analyzer)": TestConfig( - "package_asan", + JobNames.INTEGRATION_TEST_ASAN_ANALYZER: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(num_batches=6, **integration_test_common_params), # type: ignore ), - "Integration tests (tsan)": TestConfig( - "package_tsan", + JobNames.INTEGRATION_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(num_batches=6, **integration_test_common_params), # type: ignore ), # FIXME: currently no wf has this job. Try to enable - # "Integration tests (msan)": TestConfig("package_msan", job_config=JobConfig(num_batches=6, **integration_test_common_params) # type: ignore + # "Integration tests (msan)": TestConfig(Build.PACKAGE_MSAN, job_config=JobConfig(num_batches=6, **integration_test_common_params) # type: ignore # ), - "Integration tests (release)": TestConfig( - "package_release", + JobNames.INTEGRATION_TEST: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(num_batches=4, **integration_test_common_params), # type: ignore ), - "Integration tests flaky check (asan)": TestConfig( - "package_asan", job_config=JobConfig(**integration_test_common_params) # type: ignore + JobNames.INTEGRATION_TEST_FLAKY: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**integration_test_common_params) # type: ignore ), - "Compatibility check (amd64)": TestConfig( - "package_release", job_config=JobConfig(digest=compatibility_check_digest) + JobNames.COMPATIBILITY_TEST: TestConfig( + Build.PACKAGE_RELEASE, + job_config=JobConfig(digest=compatibility_check_digest), ), - "Compatibility check (aarch64)": TestConfig( - "package_aarch64", job_config=JobConfig(digest=compatibility_check_digest) + JobNames.COMPATIBILITY_TEST_ARM: TestConfig( + Build.PACKAGE_AARCH64, + job_config=JobConfig(digest=compatibility_check_digest), ), - "Unit tests (release)": TestConfig( - "binary_release", job_config=JobConfig(**unit_test_common_params) # type: ignore + JobNames.UNIT_TEST: TestConfig( + Build.BINARY_RELEASE, job_config=JobConfig(**unit_test_common_params) # type: ignore ), - "Unit tests (asan)": TestConfig( - "package_asan", job_config=JobConfig(**unit_test_common_params) # type: ignore + JobNames.UNIT_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**unit_test_common_params) # type: ignore ), - "Unit tests (msan)": TestConfig( - "package_msan", job_config=JobConfig(**unit_test_common_params) # type: ignore + JobNames.UNIT_TEST_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(**unit_test_common_params) # type: ignore ), - "Unit tests (tsan)": TestConfig( - "package_tsan", job_config=JobConfig(**unit_test_common_params) # type: ignore + JobNames.UNIT_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(**unit_test_common_params) # type: ignore ), - "Unit tests (ubsan)": TestConfig( - "package_ubsan", job_config=JobConfig(**unit_test_common_params) # type: ignore + JobNames.UNIT_TEST_UBSAN: TestConfig( + Build.PACKAGE_UBSAN, job_config=JobConfig(**unit_test_common_params) # type: ignore ), - "AST fuzzer (debug)": TestConfig( - "package_debug", job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore + JobNames.AST_FUZZER_TEST_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore ), - "AST fuzzer (asan)": TestConfig( - "package_asan", job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore + JobNames.AST_FUZZER_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore ), - "AST fuzzer (msan)": TestConfig( - "package_msan", job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore + JobNames.AST_FUZZER_TEST_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore ), - "AST fuzzer (tsan)": TestConfig( - "package_tsan", job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore + JobNames.AST_FUZZER_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore ), - "AST fuzzer (ubsan)": TestConfig( - "package_ubsan", job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore + JobNames.AST_FUZZER_TEST_UBSAN: TestConfig( + Build.PACKAGE_UBSAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore ), - "Stateless tests flaky check (asan)": TestConfig( + JobNames.STATELESS_TEST_FLAKY_ASAN: TestConfig( # replace to non-default - "package_asan", + Build.PACKAGE_ASAN, job_config=JobConfig(**{**statless_test_common_params, "timeout": 3600}), # type: ignore ), - "ClickHouse Keeper Jepsen": TestConfig( - "binary_release", + JobNames.JEPSEN_KEEPER: TestConfig( + Build.BINARY_RELEASE, job_config=JobConfig( run_by_label="jepsen-test", run_command="jepsen_check.py keeper" ), ), - "ClickHouse Server Jepsen": TestConfig( - "binary_release", + JobNames.JEPSEN_SERVER: TestConfig( + Build.BINARY_RELEASE, job_config=JobConfig( run_by_label="jepsen-test", run_command="jepsen_check.py server" ), ), - "Performance Comparison": TestConfig( - "package_release", + JobNames.PERFORMANCE_TEST_AMD64: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(num_batches=4, **perf_test_common_params), # type: ignore ), - "Performance Comparison Aarch64": TestConfig( - "package_aarch64", + JobNames.PERFORMANCE_TEST_ARM64: TestConfig( + Build.PACKAGE_AARCH64, job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params), # type: ignore ), - "SQLancer (release)": TestConfig( - "package_release", job_config=JobConfig(**sqllancer_test_common_params) # type: ignore + JobNames.SQLANCER: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**sqllancer_test_common_params) # type: ignore ), - "SQLancer (debug)": TestConfig( - "package_debug", job_config=JobConfig(**sqllancer_test_common_params) # type: ignore + JobNames.SQLANCER_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**sqllancer_test_common_params) # type: ignore ), - "Sqllogic test (release)": TestConfig( - "package_release", job_config=JobConfig(**sqllogic_test_params) # type: ignore + JobNames.SQL_LOGIC_TEST: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**sqllogic_test_params) # type: ignore ), - "SQLTest": TestConfig( - "package_release", job_config=JobConfig(**sql_test_params) # type: ignore + JobNames.SQL_LOGIC_TEST: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**sql_test_params) # type: ignore ), - "ClickBench (amd64)": TestConfig("package_release"), - "ClickBench (aarch64)": TestConfig("package_aarch64"), - # FIXME: add digest and params - "libFuzzer tests": TestConfig("fuzzers"), # type: ignore + JobNames.CLCIKBENCH_TEST: TestConfig(Build.PACKAGE_RELEASE), + JobNames.CLCIKBENCH_TEST_ARM: TestConfig(Build.PACKAGE_AARCH64), + JobNames.LIBFUZZER_TEST: TestConfig(Build.FUZZERS), # type: ignore }, ) CI_CONFIG.validate() @@ -884,18 +1055,18 @@ CI_CONFIG.validate() # checks required by Mergeable Check REQUIRED_CHECKS = [ "PR Check", - "ClickHouse build check", - "ClickHouse special build check", - "Docs Check", - "Fast test", - "Stateful tests (release)", - "Stateless tests (release)", - "Style Check", - "Unit tests (asan)", - "Unit tests (msan)", - "Unit tests (release)", - "Unit tests (tsan)", - "Unit tests (ubsan)", + JobNames.BUILD_CHECK, + JobNames.BUILD_CHECK_SPECIAL, + JobNames.DOCS_CHECK, + JobNames.FAST_TEST, + JobNames.STATEFUL_TEST_RELEASE, + JobNames.STATELESS_TEST_RELEASE, + JobNames.STYLE_CHECK, + JobNames.UNIT_TEST_ASAN, + JobNames.UNIT_TEST_MSAN, + JobNames.UNIT_TEST, + JobNames.UNIT_TEST_TSAN, + JobNames.UNIT_TEST_UBSAN, ] diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py new file mode 100644 index 00000000000..3c267cff79d --- /dev/null +++ b/tests/ci/ci_utils.py @@ -0,0 +1,19 @@ +from contextlib import contextmanager +import os +from typing import Union, Iterator +from pathlib import Path + + +class WithIter(type): + def __iter__(cls): + return (v for k, v in cls.__dict__.items() if not k.startswith("_")) + + +@contextmanager +def cd(path: Union[Path, str]) -> Iterator[None]: + oldpwd = os.getcwd() + os.chdir(path) + try: + yield + finally: + os.chdir(oldpwd) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index f9fadae4e03..72827929ff9 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -6,34 +6,22 @@ import logging import os import subprocess import sys -import atexit from pathlib import Path from typing import List, Tuple -from github import Github - from build_download_helper import download_all_deb_packages from clickhouse_helper import ( CiLogsCredentials, - ClickHouseHelper, - prepare_tests_results_for_clickhouse, ) from commit_status_helper import ( - RerunHelper, - get_commit, override_status, - post_commit_status, - update_mergeable_check, ) from docker_images_helper import get_docker_image, pull_image, DockerImage from env_helper import TEMP_PATH, REPORT_PATH -from get_robot_token import get_best_robot_token from pr_info import FORCE_TESTS_LABEL, PRInfo -from s3_helper import S3Helper from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results -from report import TestResults +from report import JobReport, TestResults def get_image_name() -> str: @@ -128,18 +116,8 @@ def main(): args = parse_args() check_name = args.check_name - gh = Github(get_best_robot_token(), per_page=100) - pr_info = PRInfo() - commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, commit, pr_info, check_name) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - image_name = get_image_name() docker_image = pull_image(get_docker_image(image_name)) @@ -186,39 +164,20 @@ def main(): logging.warning("Failed to change files owner in %s, ignoring it", temp_path) ci_logs_credentials.clean_ci_logs_from_credentials(run_log_path) - s3_helper = S3Helper() state, description, test_results, additional_logs = process_results( result_path, server_log_path ) state = override_status(state, check_name) - ch_helper = ClickHouseHelper() - - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - [run_log_path] + additional_logs, - check_name, - ) - - print(f"::notice:: {check_name} Report url: {report_url}") - post_commit_status( - commit, state, report_url, description, check_name, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[run_log_path] + additional_logs, + ).dump() if state != "success": if FORCE_TESTS_LABEL in pr_info.labels: diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index e3da81a54ad..a0c6294d8fd 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -8,22 +8,11 @@ import logging import subprocess import sys -from github import Github - from build_download_helper import download_builds_filter -from clickhouse_helper import ( - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) -from commit_status_helper import RerunHelper, get_commit, post_commit_status from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import TEMP_PATH, REPORT_PATH -from get_robot_token import get_best_robot_token -from pr_info import PRInfo -from report import TestResults, TestResult -from s3_helper import S3Helper +from report import JobReport, TestResults, TestResult from stopwatch import Stopwatch -from upload_result_helper import upload_results IMAGE_UBUNTU = "clickhouse/test-old-ubuntu" IMAGE_CENTOS = "clickhouse/test-old-centos" @@ -149,16 +138,6 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) reports_path.mkdir(parents=True, exist_ok=True) - pr_info = PRInfo() - - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, args.check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - packages_path = temp_path / "packages" packages_path.mkdir(parents=True, exist_ok=True) @@ -219,7 +198,6 @@ def main(): else: raise Exception("Can't determine max glibc version") - s3_helper = S3Helper() state, description, test_results, additional_logs = process_result( result_path, server_log_path, @@ -228,38 +206,14 @@ def main(): max_glibc_version, ) - ch_helper = ClickHouseHelper() - - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - additional_logs, - args.check_name, - ) - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, - state, - report_url, - description, - args.check_name, - pr_info, - dump_to_file=True, - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - args.check_name, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_logs, + ).dump() if state == "failure": sys.exit(1) diff --git a/tests/ci/digest_helper.py b/tests/ci/digest_helper.py index c0cbae13a45..8d6ec127f6e 100644 --- a/tests/ci/digest_helper.py +++ b/tests/ci/digest_helper.py @@ -11,6 +11,8 @@ from sys import modules from docker_images_helper import get_images_info from ci_config import DigestConfig from git_helper import Runner +from env_helper import ROOT_DIR +from ci_utils import cd DOCKER_DIGEST_LEN = 12 JOB_DIGEST_LEN = 10 @@ -67,17 +69,18 @@ def digest_paths( The order is processed as given""" hash_object = hash_object or md5() paths_all: List[Path] = [] - for p in paths: - if isinstance(p, str) and "*" in p: - for path in Path(".").glob(p): - bisect.insort(paths_all, path.absolute()) # type: ignore[misc] - else: - bisect.insort(paths_all, Path(p).absolute()) # type: ignore[misc] - for path in paths_all: # type: ignore - if path.exists(): - digest_path(path, hash_object, exclude_files, exclude_dirs) - else: - raise AssertionError(f"Invalid path: {path}") + with cd(ROOT_DIR): + for p in paths: + if isinstance(p, str) and "*" in p: + for path in Path(".").glob(p): + bisect.insort(paths_all, path.absolute()) # type: ignore[misc] + else: + bisect.insort(paths_all, Path(p).absolute()) # type: ignore[misc] + for path in paths_all: # type: ignore + if path.exists(): + digest_path(path, hash_object, exclude_files, exclude_dirs) + else: + raise AssertionError(f"Invalid path: {path}") return hash_object @@ -86,15 +89,16 @@ def digest_script(path_str: str) -> HASH: path = Path(path_str) parent = path.parent md5_hash = md5() - try: - for script in modules.values(): - script_path = getattr(script, "__file__", "") - if parent.absolute().as_posix() in script_path: - logger.debug("Updating the hash with %s", script_path) - _digest_file(Path(script_path), md5_hash) - except RuntimeError: - logger.warning("The modules size has changed, retry calculating digest") - return digest_script(path_str) + with cd(ROOT_DIR): + try: + for script in modules.values(): + script_path = getattr(script, "__file__", "") + if parent.absolute().as_posix() in script_path: + logger.debug("Updating the hash with %s", script_path) + _digest_file(Path(script_path), md5_hash) + except RuntimeError: + logger.warning("The modules size has changed, retry calculating digest") + return digest_script(path_str) return md5_hash @@ -113,17 +117,18 @@ class DockerDigester: def get_image_digest(self, name: str) -> str: assert isinstance(name, str) - deps = [name] - digest = None - while deps: - dep_name = deps.pop(0) - digest = digest_path( - self.images_info[dep_name]["path"], - digest, - exclude_files=self.EXCLUDE_FILES, - ) - deps += self.images_info[dep_name]["deps"] - assert digest + with cd(ROOT_DIR): + deps = [name] + digest = None + while deps: + dep_name = deps.pop(0) + digest = digest_path( + self.images_info[dep_name]["path"], + digest, + exclude_files=self.EXCLUDE_FILES, + ) + deps += self.images_info[dep_name]["deps"] + assert digest return digest.hexdigest()[0:DOCKER_DIGEST_LEN] def get_all_digests(self) -> Dict: diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index ae6e81c4c9f..a25669d85d0 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -12,7 +12,7 @@ from github import Github from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import format_description, get_commit, post_commit_status -from env_helper import ROOT_DIR, RUNNER_TEMP, GITHUB_RUN_URL +from env_helper import RUNNER_TEMP, GITHUB_RUN_URL from get_robot_token import get_best_robot_token from pr_info import PRInfo from report import TestResults, TestResult @@ -170,8 +170,6 @@ def parse_args() -> argparse.Namespace: def main(): - # to be always aligned with docker paths from image.json - os.chdir(ROOT_DIR) logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index 97e7ca3b5c0..f87246be24b 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -21,7 +21,6 @@ from pr_info import PRInfo from report import TestResult from s3_helper import S3Helper from stopwatch import Stopwatch -from env_helper import ROOT_DIR from upload_result_helper import upload_results from docker_images_helper import docker_login, get_images_oredered_list @@ -126,8 +125,6 @@ def create_manifest( def main(): - # to be aligned with docker paths from image.json - os.chdir(ROOT_DIR) logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index b75808890bd..b9e5c13ec42 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -10,11 +10,7 @@ from pathlib import Path from os import path as p, makedirs from typing import Dict, List -from github import Github - from build_check import get_release_or_pr -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import format_description, get_commit, post_commit_status from docker_images_helper import DockerImageData, docker_login from env_helper import ( GITHUB_RUN_URL, @@ -23,15 +19,12 @@ from env_helper import ( S3_BUILDS_BUCKET, S3_DOWNLOAD, ) -from get_robot_token import get_best_robot_token from git_helper import Git from pr_info import PRInfo -from report import TestResults, TestResult -from s3_helper import S3Helper +from report import JobReport, TestResults, TestResult from stopwatch import Stopwatch from tee_popen import TeePopen from build_download_helper import read_build_urls -from upload_result_helper import upload_results from version_helper import ( ClickHouseVersion, get_tagged_versions, @@ -346,7 +339,6 @@ def main(): image = DockerImageData(args.image_path, args.image_repo, False) args.release_type = auto_release_type(args.version, args.release_type) tags = gen_tags(args.version, args.release_type) - NAME = f"Docker image {image.repo} building check" pr_info = None repo_urls = dict() direct_urls: Dict[str, List[str]] = dict() @@ -384,7 +376,6 @@ def main(): if args.push: docker_login() - NAME = f"Docker image {image.repo} build and push" logging.info("Following tags will be created: %s", ", ".join(tags)) status = "success" @@ -398,38 +389,18 @@ def main(): ) if test_results[-1].status != "OK": status = "failure" - pr_info = pr_info or PRInfo() - s3_helper = S3Helper() - - url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) - - print(f"::notice ::Report url: {url}") - - if not args.reports: - return description = f"Processed tags: {', '.join(tags)}" + JobReport( + description=description, + test_results=test_results, + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[], + ).dump() - description = format_description(description) - - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - post_commit_status( - commit, status, url, description, NAME, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - url, - NAME, - ) - ch_helper = ClickHouseHelper() - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) if status != "success": sys.exit(1) diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py index 1424ab8895d..a982cbc2a32 100644 --- a/tests/ci/docs_check.py +++ b/tests/ci/docs_check.py @@ -1,29 +1,16 @@ #!/usr/bin/env python3 import argparse -import atexit import logging import subprocess import sys from pathlib import Path -from github import Github - -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import ( - RerunHelper, - get_commit, - post_commit_status, - update_mergeable_check, -) from docker_images_helper import get_docker_image, pull_image from env_helper import TEMP_PATH, REPO_COPY -from get_robot_token import get_best_robot_token from pr_info import PRInfo -from report import TestResults, TestResult -from s3_helper import S3Helper +from report import JobReport, TestResults, TestResult from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results NAME = "Docs Check" @@ -60,26 +47,16 @@ def main(): pr_info = PRInfo(need_changed_files=True) - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, NAME) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - atexit.register(update_mergeable_check, commit, pr_info, NAME) - if not pr_info.has_changes_in_documentation() and not args.force: logging.info("No changes in documentation") - post_commit_status( - commit, - "success", - "", - "No changes in docs", - NAME, - pr_info, - dump_to_file=True, - ) + JobReport( + description="No changes in docs", + test_results=[], + status="success", + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[], + ).dump() sys.exit(0) if pr_info.has_changes_in_documentation(): @@ -134,28 +111,15 @@ def main(): else: test_results.append(TestResult("Non zero exit code", "FAIL")) - s3_helper = S3Helper() - ch_helper = ClickHouseHelper() + JobReport( + description=description, + test_results=test_results, + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_files, + ).dump() - report_url = upload_results( - s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME - ) - print("::notice ::Report url: {report_url}") - post_commit_status( - commit, status, report_url, description, NAME, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - NAME, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) if status == "failure": sys.exit(1) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 093537fdeb0..c8ddcf25057 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -5,34 +5,15 @@ import subprocess import os import csv import sys -import atexit from pathlib import Path from typing import Tuple -from github import Github - -from build_check import get_release_or_pr -from clickhouse_helper import ( - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) -from commit_status_helper import ( - RerunHelper, - get_commit, - post_commit_status, - update_mergeable_check, - format_description, -) from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import S3_BUILDS_BUCKET, TEMP_PATH, REPO_COPY -from get_robot_token import get_best_robot_token from pr_info import FORCE_TESTS_LABEL, PRInfo -from report import TestResult, TestResults, read_test_results -from s3_helper import S3Helper +from report import JobReport, TestResult, TestResults, read_test_results from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results -from version_helper import get_version_from_repo NAME = "Fast test" @@ -121,23 +102,8 @@ def main(): pr_info = PRInfo() - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - atexit.register(update_mergeable_check, commit, pr_info, NAME) - - rerun_helper = RerunHelper(commit, NAME) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - status = rerun_helper.get_finished_status() - if status is not None and status.state != "success": - sys.exit(1) - sys.exit(0) - docker_image = pull_image(get_docker_image("clickhouse/fasttest")) - s3_helper = S3Helper() - workspace = temp_path / "fasttest-workspace" workspace.mkdir(parents=True, exist_ok=True) @@ -204,47 +170,17 @@ def main(): if timeout_expired: test_results.append(TestResult.create_check_timeout_expired(args.timeout)) state = "failure" - description = format_description(test_results[-1].name) + description = test_results[-1].name - ch_helper = ClickHouseHelper() - s3_path_prefix = "/".join( - ( - get_release_or_pr(pr_info, get_version_from_repo())[0], - pr_info.sha, - "fast_tests", - ) - ) - build_urls = s3_helper.upload_build_directory_to_s3( - output_path / "binaries", - s3_path_prefix, - keep_dirs_in_s3_path=False, - upload_symlinks=False, - ) - - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - additional_logs, - NAME, - build_urls, - ) - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, state, report_url, description, NAME, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - NAME, - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_logs, + build_dir_for_upload=str(output_path / "binaries"), + ).dump() # Refuse other checks to run if fast test failed if state != "success": diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 89fcb9ce350..b7e6c656b1f 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -20,7 +20,6 @@ from clickhouse_helper import ( prepare_tests_results_for_clickhouse, ) from commit_status_helper import ( - RerunHelper, get_commit, override_status, post_commit_status, @@ -247,13 +246,14 @@ def main(): flaky_check = "flaky" in check_name.lower() run_changed_tests = flaky_check or validate_bugfix_check - gh = Github(get_best_robot_token(), per_page=100) # For validate_bugfix_check we need up to date information about labels, so pr_event_from_api is used pr_info = PRInfo( need_changed_files=run_changed_tests, pr_event_from_api=validate_bugfix_check ) + # FIXME: move to job report and remove + gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) atexit.register(update_mergeable_check, commit, pr_info, check_name) @@ -279,11 +279,6 @@ def main(): run_by_hash_total = 0 check_name_with_group = check_name - rerun_helper = RerunHelper(commit, check_name_with_group) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - tests_to_run = [] if run_changed_tests: tests_to_run = get_tests_to_run(pr_info) diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index 5ef65f3f38b..4fc112c6d9f 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -2,7 +2,6 @@ import argparse -import atexit import logging import sys import subprocess @@ -10,30 +9,15 @@ from pathlib import Path from shutil import copy2 from typing import Dict -from github import Github from build_download_helper import download_builds_filter -from clickhouse_helper import ( - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) -from commit_status_helper import ( - RerunHelper, - format_description, - get_commit, - post_commit_status, - update_mergeable_check, -) + from compress_files import compress_fast from docker_images_helper import DockerImage, pull_image, get_docker_image -from env_helper import CI, REPORT_PATH, TEMP_PATH as TEMP -from get_robot_token import get_best_robot_token -from pr_info import PRInfo -from report import TestResults, TestResult, FAILURE, FAIL, OK, SUCCESS -from s3_helper import S3Helper +from env_helper import REPORT_PATH, TEMP_PATH as TEMP +from report import JobReport, TestResults, TestResult, FAILURE, FAIL, OK, SUCCESS from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results RPM_IMAGE = "clickhouse/install-rpm-test" @@ -274,20 +258,6 @@ def main(): TEMP_PATH.mkdir(parents=True, exist_ok=True) LOGS_PATH.mkdir(parents=True, exist_ok=True) - pr_info = PRInfo() - - if CI: - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, commit, pr_info, args.check_name) - - rerun_helper = RerunHelper(commit, args.check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info( - "Check is already finished according to github status, exiting" - ) - sys.exit(0) - deb_image = pull_image(get_docker_image(DEB_IMAGE)) rpm_image = pull_image(get_docker_image(RPM_IMAGE)) @@ -331,54 +301,21 @@ def main(): test_results.extend(test_install_tgz(rpm_image)) state = SUCCESS - test_status = OK description = "Packages installed successfully" if FAIL in (result.status for result in test_results): state = FAILURE - test_status = FAIL description = "Failed to install packages: " + ", ".join( result.name for result in test_results ) - s3_helper = S3Helper() - - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - [], - args.check_name, - ) - print(f"::notice ::Report url: {report_url}") - if not CI: - return - - ch_helper = ClickHouseHelper() - - description = format_description(description) - - post_commit_status( - commit, - state, - report_url, - description, - args.check_name, - pr_info, - dump_to_file=True, - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - test_status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - args.check_name, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[], + ).dump() if state == FAILURE: sys.exit(1) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index c65f162f770..18b3d2c2898 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -13,7 +13,6 @@ from typing import Dict, List, Tuple from build_download_helper import download_all_deb_packages from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import ( - RerunHelper, get_commit, override_status, post_commit_status, @@ -189,14 +188,10 @@ def main(): logging.info("Skipping '%s' (no pr-bugfix in '%s')", check_name, pr_info.labels) sys.exit(0) + # FIXME: switch to JobReport and remove: gh = GitHub(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) - rerun_helper = RerunHelper(commit, check_name_with_group) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - images = [pull_image(get_docker_image(i)) for i in IMAGES] result_path = temp_path / "output_dir" result_path.mkdir(parents=True, exist_ok=True) diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index 27c4b041622..93e33d62293 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -10,26 +10,23 @@ from pathlib import Path from typing import Any, List import boto3 # type: ignore -from github import Github import requests # type: ignore -from git_helper import git_runner + from build_download_helper import ( download_build_with_progress, get_build_name_for_check, read_build_urls, ) -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import RerunHelper, get_commit, post_commit_status from compress_files import compress_fast -from env_helper import REPO_COPY, REPORT_PATH, S3_BUILDS_BUCKET, S3_URL, TEMP_PATH -from get_robot_token import get_best_robot_token, get_parameter_from_ssm +from env_helper import REPO_COPY, REPORT_PATH, S3_URL, TEMP_PATH, S3_BUILDS_BUCKET +from get_robot_token import get_parameter_from_ssm +from git_helper import git_runner from pr_info import PRInfo -from report import TestResults, TestResult -from s3_helper import S3Helper +from report import JobReport, TestResults, TestResult from ssh import SSHKey from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results + JEPSEN_GROUP_NAME = "jepsen_group" @@ -186,16 +183,8 @@ def main(): logging.info("Not jepsen test label in labels list, skipping") sys.exit(0) - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - check_name = KEEPER_CHECK_NAME if args.program == "keeper" else SERVER_CHECK_NAME - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - if not os.path.exists(TEMP_PATH): os.makedirs(TEMP_PATH) @@ -292,32 +281,16 @@ def main(): description = "No Jepsen output log" test_result = [TestResult("No Jepsen output log", "FAIL")] - s3_helper = S3Helper() - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_result, - [run_log_path] + additional_data, - check_name, - ) + JobReport( + description=description, + test_results=test_result, + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[run_log_path] + additional_data, + check_name=check_name, + ).dump() - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, status, report_url, description, check_name, pr_info, dump_to_file=True - ) - - ch_helper = ClickHouseHelper() - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_result, - status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) clear_autoscaling_group() diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py index 49699b7d2fd..5f41afe9fb6 100644 --- a/tests/ci/libfuzzer_test_check.py +++ b/tests/ci/libfuzzer_test_check.py @@ -4,28 +4,18 @@ import argparse import logging import os import sys -import atexit import zipfile from pathlib import Path from typing import List -from github import Github - from build_download_helper import download_fuzzers from clickhouse_helper import ( CiLogsCredentials, ) -from commit_status_helper import ( - RerunHelper, - get_commit, - update_mergeable_check, -) from docker_images_helper import DockerImage, pull_image, get_docker_image from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY -from get_robot_token import get_best_robot_token from pr_info import PRInfo -from report import TestResults from stopwatch import Stopwatch @@ -116,28 +106,16 @@ def main(): check_name = args.check_name kill_timeout = args.kill_timeout - gh = Github(get_best_robot_token(), per_page=100) pr_info = PRInfo() - commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, commit, pr_info, check_name) temp_path.mkdir(parents=True, exist_ok=True) if "RUN_BY_HASH_NUM" in os.environ: run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0")) run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0")) - check_name_with_group = ( - check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]" - ) else: run_by_hash_num = 0 run_by_hash_total = 0 - check_name_with_group = check_name - - rerun_helper = RerunHelper(commit, check_name_with_group) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) docker_image = pull_image(get_docker_image("clickhouse/libfuzzer")) diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 75f40ae7feb..524da916a5e 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -8,11 +8,10 @@ import subprocess import traceback import re from pathlib import Path -from typing import Dict from github import Github -from commit_status_helper import RerunHelper, get_commit, post_commit_status +from commit_status_helper import get_commit from ci_config import CI_CONFIG from docker_images_helper import pull_image, get_docker_image from env_helper import ( @@ -26,11 +25,11 @@ from env_helper import ( ) from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo -from s3_helper import S3Helper from tee_popen import TeePopen from clickhouse_helper import get_instance_type, get_instance_id from stopwatch import Stopwatch from build_download_helper import download_builds_filter +from report import JobReport IMAGE_NAME = "clickhouse/performance-comparison" @@ -123,23 +122,7 @@ def main(): is_aarch64 = "aarch64" in os.getenv("CHECK_NAME", "Performance Comparison").lower() if pr_info.number != 0 and is_aarch64 and "pr-performance" not in pr_info.labels: - status = "success" - message = "Skipped, not labeled with 'pr-performance'" - report_url = GITHUB_RUN_URL - post_commit_status( - commit, - status, - report_url, - message, - check_name_with_group, - pr_info, - dump_to_file=True, - ) - sys.exit(0) - - rerun_helper = RerunHelper(commit, check_name_with_group) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") + print("Skipped, not labeled with 'pr-performance'") sys.exit(0) check_name_prefix = ( @@ -202,6 +185,13 @@ def main(): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + def too_many_slow(msg): + match = re.search(r"(|.* )(\d+) slower.*", msg) + # This threshold should be synchronized with the value in + # https://github.com/ClickHouse/ClickHouse/blob/master/docker/test/performance-comparison/report.py#L629 + threshold = 5 + return int(match.group(2).strip()) > threshold if match else False + paths = { "compare.log": compare_log_path, "output.7z": result_path / "output.7z", @@ -212,32 +202,12 @@ def main(): "run.log": run_log_path, } - s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_prefix}/" - s3_helper = S3Helper() - uploaded = {} # type: Dict[str, str] - for name, path in paths.items(): - try: - uploaded[name] = s3_helper.upload_test_report_to_s3( - Path(path), s3_prefix + name - ) - except Exception: - uploaded[name] = "" - traceback.print_exc() - - # Upload all images and flamegraphs to S3 - try: - s3_helper.upload_test_directory_to_s3( - Path(result_path) / "images", s3_prefix + "images" - ) - except Exception: - traceback.print_exc() - - def too_many_slow(msg): - match = re.search(r"(|.* )(\d+) slower.*", msg) - # This threshold should be synchronized with the value in - # https://github.com/ClickHouse/ClickHouse/blob/master/docker/test/performance-comparison/report.py#L629 - threshold = 5 - return int(match.group(2).strip()) > threshold if match else False + # FIXME: where images come from? dir does not exist atm. + image_files = ( + list((Path(result_path) / "images").iterdir()) + if (Path(result_path) / "images").exists() + else [] + ) # Try to fetch status from the report. status = "" @@ -269,24 +239,15 @@ def main(): status = "failure" message = "No message in report." - report_url = GITHUB_RUN_URL - - report_url = ( - uploaded["report.html"] - or uploaded["output.7z"] - or uploaded["compare.log"] - or uploaded["run.log"] - ) - - post_commit_status( - commit, - status, - report_url, - message, - check_name_with_group, - pr_info, - dump_to_file=True, - ) + JobReport( + description=message, + test_results=[], + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[v for _, v in paths.items()] + image_files, + check_name=check_name_with_group, + ).dump() if status == "error": sys.exit(1) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 1359ac5e63e..d1867ebf786 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -69,11 +69,13 @@ def get_pr_for_commit(sha, ref): if pr["head"]["ref"] in ref: return pr our_prs.append(pr) - print("Cannot find PR with required ref", ref, "returning first one") + print( + f"Cannot find PR with required ref {ref}, sha {sha} - returning first one" + ) first_pr = our_prs[0] return first_pr except Exception as ex: - print("Cannot fetch PR info from commit", ex) + print(f"Cannot fetch PR info from commit {ref}, {sha}", ex) return None @@ -279,6 +281,9 @@ class PRInfo: if need_changed_files: self.fetch_changed_files() + def is_master(self) -> bool: + return self.number == 0 and self.base_ref == "master" + def is_scheduled(self): return self.event_type == EventType.SCHEDULE diff --git a/tests/ci/report.py b/tests/ci/report.py index 6273eed2bc7..b478f737963 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -1,8 +1,18 @@ # -*- coding: utf-8 -*- from ast import literal_eval -from dataclasses import dataclass +from dataclasses import asdict, dataclass from pathlib import Path -from typing import Dict, Final, Iterable, List, Literal, Optional, Tuple +from typing import ( + Dict, + Final, + Iterable, + List, + Literal, + Optional, + Sequence, + Tuple, + Union, +) from html import escape import csv import datetime @@ -12,6 +22,7 @@ import os from build_download_helper import get_gh_api from ci_config import BuildConfig, CI_CONFIG +from env_helper import REPORT_PATH, TEMP_PATH logger = logging.getLogger(__name__) @@ -221,6 +232,7 @@ HTML_TEST_PART = """ """ BASE_HEADERS = ["Test name", "Test status"] +JOB_REPORT_FILE = Path(TEMP_PATH) / "job_report.json" @dataclass @@ -229,10 +241,10 @@ class TestResult: status: str # the following fields are optional time: Optional[float] = None - log_files: Optional[List[Path]] = None + log_files: Optional[Union[Sequence[str], Sequence[Path]]] = None raw_logs: Optional[str] = None # the field for uploaded logs URLs - log_urls: Optional[List[str]] = None + log_urls: Optional[Sequence[str]] = None def set_raw_logs(self, raw_logs: str) -> None: self.raw_logs = raw_logs @@ -245,9 +257,8 @@ class TestResult: f"Malformed input: must be a list literal: {log_files_literal}" ) for log_path in log_paths: - file = Path(log_path) - assert file.exists(), file - self.log_files.append(file) + assert Path(log_path).exists(), log_path + self.log_files.append(log_path) @staticmethod def create_check_timeout_expired(timeout: float) -> "TestResult": @@ -257,6 +268,53 @@ class TestResult: TestResults = List[TestResult] +@dataclass +class JobReport: + status: str + description: str + test_results: TestResults + start_time: str + duration: float + additional_files: Union[Sequence[str], Sequence[Path]] + # clcikhouse version, build job only + version: str = "" + # checkname to set in commit status, set if differs from jjob name + check_name: str = "" + # directory with artifacts to upload on s3 + build_dir_for_upload: Union[Path, str] = "" + # if False no GH commit status will be created by CI + need_commit_status: bool = True + + @classmethod + def exist(cls) -> bool: + return JOB_REPORT_FILE.is_file() + + @classmethod + def load(cls): # type: ignore + res = {} + with open(JOB_REPORT_FILE, "r") as json_file: + res = json.load(json_file) + # Deserialize the nested lists of TestResult + test_results_data = res.get("test_results", []) + test_results = [TestResult(**result) for result in test_results_data] + del res["test_results"] + return JobReport(test_results=test_results, **res) + + @classmethod + def cleanup(cls): + if JOB_REPORT_FILE.exists(): + JOB_REPORT_FILE.unlink() + + def dump(self): + def path_converter(obj): + if isinstance(obj, Path): + return str(obj) + raise TypeError("Type not serializable") + + with open(JOB_REPORT_FILE, "w") as json_file: + json.dump(asdict(self), json_file, default=path_converter, indent=2) + + def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestResults: results = [] # type: TestResults with open(results_path, "r", encoding="utf-8") as descriptor: @@ -296,14 +354,72 @@ class BuildResult: log_url: str build_urls: List[str] version: str - status: StatusType + status: str elapsed_seconds: int job_api_url: str + pr_number: int = 0 + head_ref: str = "dummy_branch_name" _job_name: Optional[str] = None _job_html_url: Optional[str] = None _job_html_link: Optional[str] = None _grouped_urls: Optional[List[List[str]]] = None + @classmethod + def cleanup(cls): + if Path(REPORT_PATH).exists(): + for file in Path(REPORT_PATH).iterdir(): + if "build_report" in file.name and file.name.endswith(".json"): + file.unlink() + + @classmethod + def load(cls, build_name: str, pr_number: int, head_ref: str): # type: ignore + """ + loads report from a report file matched with given @pr_number and/or a @head_ref + """ + report_path = Path(REPORT_PATH) / BuildResult.get_report_name( + build_name, pr_number or head_ref + ) + return cls.load_from_file(report_path) + + @classmethod + def load_any(cls, build_name: str, pr_number: int, head_ref: str): # type: ignore + """ + loads report from suitable report file with the following priority: + 1. report from PR with the same @pr_number + 2. report from branch with the same @head_ref + 3. report from the master + 4. any other report + """ + reports = [] + for file in Path(REPORT_PATH).iterdir(): + if f"{build_name}.json" in file.name: + reports.append(file) + if not reports: + return None + file_path = None + for file in reports: + if pr_number and f"_{pr_number}_" in file.name: + file_path = file + break + if f"_{head_ref}_" in file.name: + file_path = file + break + if "_master_" in file.name: + file_path = file + break + return cls.load_from_file(file_path or reports[-1]) + + @classmethod + def load_from_file(cls, file: Union[Path, str]): # type: ignore + if not Path(file).exists(): + return None + with open(file, "r") as json_file: + res = json.load(json_file) + return BuildResult(**res) + + def as_json(self) -> str: + return json.dumps(asdict(self), indent=2) + @property def build_config(self) -> Optional[BuildConfig]: return CI_CONFIG.build_config.get(self.build_name, None) @@ -373,10 +489,6 @@ class BuildResult: def _wrong_config_message(self) -> str: return "missing" - @property - def file_name(self) -> Path: - return self.get_report_name(self.build_name) - @property def is_missing(self) -> bool: "The report is created for missing json file" @@ -427,37 +539,18 @@ class BuildResult: self._job_html_url = job_data.get("html_url", "") @staticmethod - def get_report_name(name: str) -> Path: - return Path(f"build_report_{name}.json") - - @staticmethod - def read_json(directory: Path, build_name: str) -> "BuildResult": - path = directory / BuildResult.get_report_name(build_name) - try: - with open(path, "r", encoding="utf-8") as pf: - data = json.load(pf) # type: dict - except FileNotFoundError: - logger.warning( - "File %s for build named '%s' is not found", path, build_name - ) - return BuildResult.missing_result(build_name) - - return BuildResult( - data.get("build_name", build_name), - data.get("log_url", ""), - data.get("build_urls", []), - data.get("version", ""), - data.get("status", ERROR), - data.get("elapsed_seconds", 0), - data.get("job_api_url", ""), - ) + def get_report_name(name: str, suffix: Union[str, int]) -> Path: + assert "/" not in str(suffix) + return Path(f"build_report_{suffix}_{name}.json") @staticmethod def missing_result(build_name: str) -> "BuildResult": return BuildResult(build_name, "", [], "missing", ERROR, 0, "missing") - def write_json(self, directory: Path) -> Path: - path = directory / self.file_name + def write_json(self, directory: Union[Path, str] = REPORT_PATH) -> Path: + path = Path(directory) / self.get_report_name( + self.build_name, self.pr_number or self.head_ref + ) path.write_text( json.dumps( { @@ -468,6 +561,8 @@ class BuildResult: "status": self.status, "elapsed_seconds": self.elapsed_seconds, "job_api_url": self.job_api_url, + "pr_number": self.pr_number, + "head_ref": self.head_ref, } ), encoding="utf-8", @@ -532,10 +627,17 @@ def _get_status_style(status: str, colortheme: Optional[ColorTheme] = None) -> s def _get_html_url_name(url): + base_name = "" if isinstance(url, str): - return os.path.basename(url).replace("%2B", "+").replace("%20", " ") + base_name = os.path.basename(url) if isinstance(url, tuple): - return url[1].replace("%2B", "+").replace("%20", " ") + base_name = url[1] + + if "?" in base_name: + base_name = base_name.split("?")[0] + + if base_name is not None: + return base_name.replace("%2B", "+").replace("%20", " ") return None diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index 35641ba0455..f85ab2be9a3 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -6,29 +6,15 @@ import subprocess import sys from pathlib import Path -from github import Github - from build_download_helper import get_build_name_for_check, read_build_urls -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import ( - RerunHelper, - format_description, - get_commit, - post_commit_status, -) from docker_images_helper import DockerImage, pull_image, get_docker_image from env_helper import ( - GITHUB_RUN_URL, REPORT_PATH, TEMP_PATH, ) -from get_robot_token import get_best_robot_token -from pr_info import PRInfo -from report import TestResults, TestResult -from s3_helper import S3Helper +from report import JobReport, TestResults, TestResult from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results IMAGE_NAME = "clickhouse/sqlancer-test" @@ -58,16 +44,6 @@ def main(): check_name ), "Check name must be provided as an input arg or in CHECK_NAME env" - pr_info = PRInfo() - - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - docker_image = pull_image(get_docker_image(IMAGE_NAME)) build_name = get_build_name_for_check(check_name) @@ -118,9 +94,6 @@ def main(): paths += [workspace_path / f"{t}.err" for t in tests] paths += [workspace_path / f"{t}.out" for t in tests] - s3_helper = S3Helper() - report_url = GITHUB_RUN_URL - status = "success" test_results = [] # type: TestResults # Try to get status message saved by the SQLancer @@ -139,33 +112,17 @@ def main(): status = "failure" description = "Task failed: $?=" + str(retcode) - description = format_description(description) + if not test_results: + test_results = [TestResult(name=__file__, status=status)] - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - paths, - check_name, - ) - - post_commit_status( - commit, status, report_url, description, check_name, pr_info, dump_to_file=True - ) - print(f"::notice:: {check_name} Report url: {report_url}") - - ch_helper = ClickHouseHelper() - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=paths, + ).dump() if __name__ == "__main__": diff --git a/tests/ci/sqllogic_test.py b/tests/ci/sqllogic_test.py index 96b4de4517f..a7b3e3cf69e 100755 --- a/tests/ci/sqllogic_test.py +++ b/tests/ci/sqllogic_test.py @@ -5,28 +5,25 @@ import csv import logging import os import subprocess -import sys from pathlib import Path -from typing import List, Tuple - -from github import Github +from typing import Tuple from build_download_helper import download_all_deb_packages -from commit_status_helper import ( - RerunHelper, - get_commit, - override_status, - post_commit_status, -) +from commit_status_helper import override_status from docker_images_helper import DockerImage, pull_image, get_docker_image from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY -from get_robot_token import get_best_robot_token -from pr_info import PRInfo -from report import OK, FAIL, ERROR, SUCCESS, TestResults, TestResult, read_test_results -from s3_helper import S3Helper +from report import ( + OK, + FAIL, + ERROR, + SUCCESS, + JobReport, + TestResults, + TestResult, + read_test_results, +) from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results NO_CHANGES_MSG = "Nothing to run" @@ -104,15 +101,6 @@ def main(): kill_timeout > 0 ), "kill timeout must be provided as an input arg or in KILL_TIMEOUT env" - pr_info = PRInfo() - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - docker_image = pull_image(get_docker_image(IMAGE_NAME)) repo_tests_path = repo_path / "tests" @@ -150,8 +138,6 @@ def main(): logging.info("Files in result folder %s", os.listdir(result_path)) - s3_helper = S3Helper() - status = None description = None @@ -186,29 +172,19 @@ def main(): ) ) - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - additional_logs, - check_name, - ) - - print( - f"::notice:: {check_name}" - f", Result: '{status}'" - f", Description: '{description}'" - f", Report url: '{report_url}'" - ) - # Until it pass all tests, do not block CI, report "success" assert description is not None # FIXME: force SUCCESS until all cases are fixed status = SUCCESS - post_commit_status( - commit, status, report_url, description, check_name, pr_info, dump_to_file=True - ) + + JobReport( + description=description, + test_results=test_results, + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_logs, + ).dump() if __name__ == "__main__": diff --git a/tests/ci/sqltest.py b/tests/ci/sqltest.py index edb64d9f106..b2105d4f5c0 100644 --- a/tests/ci/sqltest.py +++ b/tests/ci/sqltest.py @@ -7,25 +7,15 @@ import sys from pathlib import Path from typing import Dict -from github import Github from build_download_helper import get_build_name_for_check, read_build_urls -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import ( - RerunHelper, - get_commit, - post_commit_status, -) from docker_images_helper import pull_image, get_docker_image from env_helper import ( - GITHUB_RUN_URL, REPORT_PATH, TEMP_PATH, ) -from get_robot_token import get_best_robot_token from pr_info import PRInfo -from report import TestResult -from s3_helper import S3Helper +from report import JobReport, TestResult from stopwatch import Stopwatch IMAGE_NAME = "clickhouse/sqltest" @@ -62,14 +52,6 @@ def main(): pr_info = PRInfo() - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - docker_image = pull_image(get_docker_image(IMAGE_NAME)) build_name = get_build_name_for_check(check_name) @@ -109,10 +91,6 @@ def main(): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) - check_name_lower = ( - check_name.lower().replace("(", "").replace(")", "").replace(" ", "") - ) - s3_prefix = f"{pr_info.number}/{pr_info.sha}/sqltest_{check_name_lower}/" paths = { "run.log": run_log_path, "server.log.zst": workspace_path / "server.log.zst", @@ -120,43 +98,18 @@ def main(): "report.html": workspace_path / "report.html", "test.log": workspace_path / "test.log", } - path_urls = {} # type: Dict[str, str] - - s3_helper = S3Helper() - for f in paths: - try: - path_urls[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + f) - except Exception as ex: - logging.info("Exception uploading file %s text %s", f, ex) - path_urls[f] = "" - - report_url = GITHUB_RUN_URL - if path_urls["report.html"]: - report_url = path_urls["report.html"] - status = "success" description = "See the report" - test_result = TestResult(description, "OK") + test_results = [TestResult(description, "OK")] - ch_helper = ClickHouseHelper() - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - [test_result], - status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - - logging.info("Result: '%s', '%s', '%s'", status, description, report_url) - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, status, report_url, description, check_name, pr_info, dump_to_file=True - ) + JobReport( + description=description, + test_results=test_results, + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[v for _, v in paths.items()], + ).dump() if __name__ == "__main__": diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 05250c14fd1..46bb2261aba 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -8,29 +8,15 @@ import sys from pathlib import Path from typing import List, Tuple -from github import Github - from build_download_helper import download_all_deb_packages -from clickhouse_helper import ( - CiLogsCredentials, - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) -from commit_status_helper import ( - RerunHelper, - get_commit, - post_commit_status, - format_description, -) +from clickhouse_helper import CiLogsCredentials + from docker_images_helper import DockerImage, pull_image, get_docker_image from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY -from get_robot_token import get_best_robot_token from pr_info import PRInfo -from report import TestResult, TestResults, read_test_results -from s3_helper import S3Helper +from report import JobReport, TestResult, TestResults, read_test_results from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results def get_additional_envs() -> List[str]: @@ -139,14 +125,6 @@ def run_stress_test(docker_image_name: str) -> None: pr_info = PRInfo() - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - docker_image = pull_image(get_docker_image(docker_image_name)) packages_path = temp_path / "packages" @@ -194,7 +172,6 @@ def run_stress_test(docker_image_name: str) -> None: subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) ci_logs_credentials.clean_ci_logs_from_credentials(run_log_path) - s3_helper = S3Helper() state, description, test_results, additional_logs = process_results( result_path, server_log_path, run_log_path ) @@ -202,34 +179,16 @@ def run_stress_test(docker_image_name: str) -> None: if timeout_expired: test_results.append(TestResult.create_check_timeout_expired(timeout)) state = "failure" - description = format_description(test_results[-1].name) + description = test_results[-1].name - ch_helper = ClickHouseHelper() - - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - additional_logs, - check_name, - ) - print(f"::notice ::Report url: {report_url}") - - post_commit_status( - commit, state, report_url, description, check_name, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_logs, + ).dump() if state == "failure": sys.exit(1) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index b37dcb59237..4f791a5ee01 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 import argparse -import atexit import csv import logging import os @@ -9,24 +8,14 @@ import sys from pathlib import Path from typing import List, Tuple -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import ( - RerunHelper, - get_commit, - post_commit_status, - update_mergeable_check, -) + from docker_images_helper import get_docker_image, pull_image from env_helper import REPO_COPY, TEMP_PATH -from get_robot_token import get_best_robot_token from git_helper import GIT_PREFIX, git_runner -from github_helper import GitHub from pr_info import PRInfo -from report import TestResults, read_test_results -from s3_helper import S3Helper +from report import JobReport, TestResults, read_test_results from ssh import SSHKey from stopwatch import Stopwatch -from upload_result_helper import upload_results NAME = "Style Check" @@ -142,21 +131,6 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) pr_info = PRInfo() - gh = GitHub(get_best_robot_token(), create_cache_dir=False) - commit = get_commit(gh, pr_info.sha) - - atexit.register(update_mergeable_check, commit, pr_info, NAME) - - rerun_helper = RerunHelper(commit, NAME) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - # Finish with the same code as previous - state = rerun_helper.get_finished_status().state # type: ignore - # state == "success" -> code = 0 - code = int(state != "success") - sys.exit(code) - - s3_helper = S3Helper() IMAGE_NAME = "clickhouse/style-test" image = pull_image(get_docker_image(IMAGE_NAME)) @@ -180,28 +154,18 @@ def main(): checkout_last_ref(pr_info) state, description, test_results, additional_files = process_result(temp_path) - ch_helper = ClickHouseHelper() - report_url = upload_results( - s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME - ) - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, state, report_url, description, NAME, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - NAME, - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_files, + ).dump() if state in ["error", "failure"]: + print(f"Style check failed: [{description}]") sys.exit(1) diff --git a/tests/ci/test_digest.py b/tests/ci/test_digest.py index 86ba16ec525..44117bb604b 100644 --- a/tests/ci/test_digest.py +++ b/tests/ci/test_digest.py @@ -5,6 +5,7 @@ from hashlib import md5 from pathlib import Path import digest_helper as dh +from env_helper import ROOT_DIR _12 = b"12\n" _13 = b"13\n" @@ -13,7 +14,7 @@ _14 = b"14\n" # pylint:disable=protected-access class TestDigests(unittest.TestCase): - tests_dir = Path("tests/digests") + tests_dir = Path(ROOT_DIR) / "tests/ci/tests/digests" broken_link = tests_dir / "broken-symlink" empty_digest = "d41d8cd98f00b204e9800998ecf8427e" diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index f1238a00bd4..495547e1dfc 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -5,33 +5,15 @@ import logging import os import sys import subprocess -import atexit from pathlib import Path -from typing import List, Tuple - -from github import Github +from typing import Tuple from build_download_helper import download_unit_tests -from clickhouse_helper import ( - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) -from commit_status_helper import ( - RerunHelper, - get_commit, - post_commit_status, - update_mergeable_check, -) from docker_images_helper import pull_image, get_docker_image from env_helper import REPORT_PATH, TEMP_PATH -from get_robot_token import get_best_robot_token -from pr_info import PRInfo -from report import ERROR, FAILURE, FAIL, OK, SUCCESS, TestResults, TestResult -from s3_helper import S3Helper +from report import ERROR, FAILURE, FAIL, OK, SUCCESS, JobReport, TestResults, TestResult from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results - IMAGE_NAME = "clickhouse/unit-test" @@ -182,18 +164,6 @@ def main(): temp_path = Path(TEMP_PATH) temp_path.mkdir(parents=True, exist_ok=True) - pr_info = PRInfo() - - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - atexit.register(update_mergeable_check, commit, pr_info, check_name) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - docker_image = pull_image(get_docker_image(IMAGE_NAME)) download_unit_tests(check_name, REPORT_PATH, TEMP_PATH) @@ -222,35 +192,18 @@ def main(): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {TEMP_PATH}", shell=True) - s3_helper = S3Helper() state, description, test_results = process_results(test_output) - - ch_helper = ClickHouseHelper() - - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - [run_log_path] + [p for p in test_output.iterdir() if not p.is_dir()], - check_name, - ) - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, state, report_url, description, check_name, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + additional_files = [run_log_path] + [ + p for p in test_output.iterdir() if not p.is_dir() + ] + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_files, + ).dump() if state == "failure": sys.exit(1) diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index bf52e6069dd..6fa9c1dd873 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Sequence, Union import os import logging @@ -15,13 +15,13 @@ from s3_helper import S3Helper def process_logs( s3_client: S3Helper, - additional_logs: List[Path], + additional_logs: Union[Sequence[str], Sequence[Path]], s3_path_prefix: str, test_results: TestResults, ) -> List[str]: logging.info("Upload files to s3 %s", additional_logs) - processed_logs = {} # type: Dict[Path, str] + processed_logs = {} # type: Dict[str, str] # Firstly convert paths of logs from test_results to urls to s3. for test_result in test_results: if test_result.log_files is None: @@ -31,22 +31,24 @@ def process_logs( test_result.log_urls = [] for path in test_result.log_files: if path in processed_logs: - test_result.log_urls.append(processed_logs[path]) + test_result.log_urls.append(processed_logs[str(path)]) elif path: url = s3_client.upload_test_report_to_s3( - path, s3_path_prefix + "/" + path.name + Path(path), s3_path_prefix + "/" + str(path) ) test_result.log_urls.append(url) - processed_logs[path] = url + processed_logs[str(path)] = url additional_urls = [] for log_path in additional_logs: - if log_path.is_file(): + if Path(log_path).is_file(): additional_urls.append( s3_client.upload_test_report_to_s3( - log_path, s3_path_prefix + "/" + os.path.basename(log_path) + Path(log_path), s3_path_prefix + "/" + os.path.basename(log_path) ) ) + else: + logging.error("File %s is missing - skip", log_path) return additional_urls @@ -56,7 +58,7 @@ def upload_results( pr_number: int, commit_sha: str, test_results: TestResults, - additional_files: List[Path], + additional_files: Union[Sequence[Path], Sequence[str]], check_name: str, additional_urls: Optional[List[str]] = None, ) -> str: @@ -65,12 +67,11 @@ def upload_results( normalized_check_name = normalized_check_name.replace(*r) # Preserve additional_urls to not modify the original one - original_additional_urls = additional_urls or [] + additional_urls = additional_urls or [] s3_path_prefix = f"{pr_number}/{commit_sha}/{normalized_check_name}" - additional_urls = process_logs( - s3_client, additional_files, s3_path_prefix, test_results + additional_urls.extend( + process_logs(s3_client, additional_files, s3_path_prefix, test_results) ) - additional_urls.extend(original_additional_urls) branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master" branch_name = "master" @@ -79,6 +80,13 @@ def upload_results( branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_number}" commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{commit_sha}" + ready_report_url = None + for url in additional_urls: + if "report.html" in url: + ready_report_url = url + additional_urls.remove(ready_report_url) + break + if additional_urls: raw_log_url = additional_urls.pop(0) else: @@ -88,21 +96,25 @@ def upload_results( ReportColorTheme.bugfixcheck if "bugfix validate check" in check_name else None ) - html_report = create_test_html_report( - check_name, - test_results, - raw_log_url, - GITHUB_RUN_URL, - GITHUB_JOB_URL(), - branch_url, - branch_name, - commit_url, - additional_urls, - statuscolors=statuscolors, - ) - report_path = Path("report.html") - report_path.write_text(html_report, encoding="utf-8") + if test_results or not ready_report_url: + html_report = create_test_html_report( + check_name, + test_results, + raw_log_url, + GITHUB_RUN_URL, + GITHUB_JOB_URL(), + branch_url, + branch_name, + commit_url, + additional_urls, + statuscolors=statuscolors, + ) + report_path = Path("report.html") + report_path.write_text(html_report, encoding="utf-8") + url = s3_client.upload_test_report_to_s3(report_path, s3_path_prefix + ".html") + else: + logging.info("report.html was prepared by test job itself") + url = ready_report_url - url = s3_client.upload_test_report_to_s3(report_path, s3_path_prefix + ".html") logging.info("Search result in url %s", url) return url diff --git a/tests/config/config.d/clusters.xml b/tests/config/config.d/clusters.xml index cfd4868f1dc..7ade716902c 100644 --- a/tests/config/config.d/clusters.xml +++ b/tests/config/config.d/clusters.xml @@ -144,6 +144,24 @@ + + + false + + 127.0.0.1 + 9000 + + + 127.0.0.2 + 9000 + + + + 127.0.0.3 + 1234 + + + false diff --git a/tests/config/config.d/filesystem_caches_path.xml b/tests/config/config.d/filesystem_caches_path.xml index ca946db2e0a..dd6933e9524 100644 --- a/tests/config/config.d/filesystem_caches_path.xml +++ b/tests/config/config.d/filesystem_caches_path.xml @@ -1,3 +1,4 @@ /var/lib/clickhouse/filesystem_caches/ + /var/lib/clickhouse/filesystem_caches/ diff --git a/tests/integration/helpers/0_common_instance_config.xml b/tests/integration/helpers/0_common_instance_config.xml index 535cf252274..73792affee6 100644 --- a/tests/integration/helpers/0_common_instance_config.xml +++ b/tests/integration/helpers/0_common_instance_config.xml @@ -27,4 +27,6 @@ true 2 + + / diff --git a/tests/integration/test_backup_restore_new/configs/shutdown_cancel_backups.xml b/tests/integration/test_backup_restore_new/configs/shutdown_cancel_backups.xml new file mode 100644 index 00000000000..e0c0e0b32cd --- /dev/null +++ b/tests/integration/test_backup_restore_new/configs/shutdown_cancel_backups.xml @@ -0,0 +1,3 @@ + + false + diff --git a/tests/integration/test_backup_restore_new/configs/slow_backups.xml b/tests/integration/test_backup_restore_new/configs/slow_backups.xml new file mode 100644 index 00000000000..96860e65fdc --- /dev/null +++ b/tests/integration/test_backup_restore_new/configs/slow_backups.xml @@ -0,0 +1,7 @@ + + + true + + 2 + 2 + diff --git a/tests/integration/test_backup_restore_new/test_cancel_backup.py b/tests/integration/test_backup_restore_new/test_cancel_backup.py new file mode 100644 index 00000000000..06bcb5eadfc --- /dev/null +++ b/tests/integration/test_backup_restore_new/test_cancel_backup.py @@ -0,0 +1,232 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV, assert_eq_with_retry +import uuid +import re + + +cluster = ClickHouseCluster(__file__) + +main_configs = [ + "configs/backups_disk.xml", + "configs/slow_backups.xml", + "configs/shutdown_cancel_backups.xml", +] + +node = cluster.add_instance( + "node", + main_configs=main_configs, + external_dirs=["/backups/"], + stay_alive=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def drop_after_test(): + try: + yield + finally: + node.query("DROP TABLE IF EXISTS tbl SYNC") + + +# Generate the backup name. +def get_backup_name(backup_id): + return f"Disk('backups', '{backup_id}')" + + +# Start making a backup asynchronously. +def start_backup(backup_id): + node.query( + f"BACKUP TABLE tbl TO {get_backup_name(backup_id)} SETTINGS id='{backup_id}' ASYNC" + ) + + assert ( + node.query(f"SELECT status FROM system.backups WHERE id='{backup_id}'") + == "CREATING_BACKUP\n" + ) + assert ( + node.query( + f"SELECT count() FROM system.processes WHERE query_kind='Backup' AND query LIKE '%{backup_id}%'" + ) + == "1\n" + ) + + +# Wait for the backup to be completed. +def wait_backup(backup_id): + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id='{backup_id}'", + "BACKUP_CREATED", + retry_count=60, + sleep_time=5, + ) + + backup_duration = int( + node.query( + f"SELECT end_time - start_time FROM system.backups WHERE id='{backup_id}'" + ) + ) + assert backup_duration >= 3 # Backup is not expected to be too quick in this test. + + +# Cancel the specified backup. +def cancel_backup(backup_id): + node.query( + f"KILL QUERY WHERE query_kind='Backup' AND query LIKE '%{backup_id}%' SYNC" + ) + assert ( + node.query(f"SELECT status FROM system.backups WHERE id='{backup_id}'") + == "BACKUP_CANCELLED\n" + ) + expected_error = "QUERY_WAS_CANCELLED" + assert expected_error in node.query( + f"SELECT error FROM system.backups WHERE id='{backup_id}'" + ) + assert ( + node.query( + f"SELECT count() FROM system.processes WHERE query_kind='Backup' AND query LIKE '%{backup_id}%'" + ) + == "0\n" + ) + node.query("SYSTEM FLUSH LOGS") + kill_duration_ms = int( + node.query( + f"SELECT query_duration_ms FROM system.query_log WHERE query_kind='KillQuery' AND query LIKE '%{backup_id}%' AND type='QueryFinish'" + ) + ) + assert kill_duration_ms < 2000 # Query must be cancelled quickly + + +# Start restoring from a backup. +def start_restore(restore_id, backup_id): + node.query( + f"RESTORE TABLE tbl FROM {get_backup_name(backup_id)} SETTINGS id='{restore_id}' ASYNC" + ) + + assert ( + node.query(f"SELECT status FROM system.backups WHERE id='{restore_id}'") + == "RESTORING\n" + ) + assert ( + node.query( + f"SELECT count() FROM system.processes WHERE query_kind='Restore' AND query LIKE '%{restore_id}%'" + ) + == "1\n" + ) + + +# Wait for the restore operation to be completed. +def wait_restore(restore_id): + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id='{restore_id}'", + "RESTORED", + retry_count=60, + sleep_time=5, + ) + + restore_duration = int( + node.query( + f"SELECT end_time - start_time FROM system.backups WHERE id='{restore_id}'" + ) + ) + assert ( + restore_duration >= 3 + ) # Restore is not expected to be too quick in this test. + + +# Cancel the specified restore operation. +def cancel_restore(restore_id): + node.query( + f"KILL QUERY WHERE query_kind='Restore' AND query LIKE '%{restore_id}%' SYNC" + ) + assert ( + node.query(f"SELECT status FROM system.backups WHERE id='{restore_id}'") + == "RESTORE_CANCELLED\n" + ) + expected_error = "QUERY_WAS_CANCELLED" + assert expected_error in node.query( + f"SELECT error FROM system.backups WHERE id='{restore_id}'" + ) + assert ( + node.query( + f"SELECT count() FROM system.processes WHERE query_kind='Restore' AND query LIKE '%{restore_id}%'" + ) + == "0\n" + ) + node.query("SYSTEM FLUSH LOGS") + kill_duration_ms = int( + node.query( + f"SELECT query_duration_ms FROM system.query_log WHERE query_kind='KillQuery' AND query LIKE '%{restore_id}%' AND type='QueryFinish'" + ) + ) + assert kill_duration_ms < 2000 # Query must be cancelled quickly + + +# Test that BACKUP and RESTORE operations can be cancelled with KILL QUERY. +def test_cancel_backup(): + # We use partitioning so backups would contain more files. + node.query( + "CREATE TABLE tbl (x UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY x%5" + ) + + node.query(f"INSERT INTO tbl SELECT number FROM numbers(500)") + + try_backup_id_1 = uuid.uuid4().hex + start_backup(try_backup_id_1) + cancel_backup(try_backup_id_1) + + backup_id = uuid.uuid4().hex + start_backup(backup_id) + wait_backup(backup_id) + + node.query(f"DROP TABLE tbl SYNC") + + try_restore_id_1 = uuid.uuid4().hex + start_restore(try_restore_id_1, backup_id) + cancel_restore(try_restore_id_1) + + node.query(f"DROP TABLE tbl SYNC") + + restore_id = uuid.uuid4().hex + start_restore(restore_id, backup_id) + wait_restore(restore_id) + + +# Test that shutdown cancels a running backup and doesn't wait until it finishes. +def test_shutdown_cancel_backup(): + node.query( + "CREATE TABLE tbl (x UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY x%5" + ) + + node.query(f"INSERT INTO tbl SELECT number FROM numbers(500)") + + backup_id = uuid.uuid4().hex + start_backup(backup_id) + + node.restart_clickhouse() # Must cancel the backup. + + # The information about this cancelled backup must be stored in system.backup_log + assert node.query( + f"SELECT status FROM system.backup_log WHERE id='{backup_id}' ORDER BY status" + ) == TSV(["CREATING_BACKUP", "BACKUP_CANCELLED"]) + + # The table can't be restored from this backup. + expected_error = "Backup .* not found" + node.query("DROP TABLE tbl SYNC") + assert re.search( + expected_error, + node.query_and_get_error( + f"RESTORE TABLE tbl FROM {get_backup_name(backup_id)}" + ), + ) diff --git a/tests/integration/test_backup_restore_new/test_shutdown_wait_backup.py b/tests/integration/test_backup_restore_new/test_shutdown_wait_backup.py new file mode 100644 index 00000000000..326e0c5da6c --- /dev/null +++ b/tests/integration/test_backup_restore_new/test_shutdown_wait_backup.py @@ -0,0 +1,82 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV, assert_eq_with_retry +import uuid + + +cluster = ClickHouseCluster(__file__) + +main_configs = [ + "configs/backups_disk.xml", + "configs/slow_backups.xml", +] + +node = cluster.add_instance( + "node", + main_configs=main_configs, + external_dirs=["/backups/"], + stay_alive=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def drop_after_test(): + try: + yield + finally: + node.query("DROP TABLE IF EXISTS tbl SYNC") + + +# Generate the backup name. +def get_backup_name(backup_id): + return f"Disk('backups', '{backup_id}')" + + +# Start making a backup asynchronously. +def start_backup(backup_id): + node.query( + f"BACKUP TABLE tbl TO {get_backup_name(backup_id)} SETTINGS id='{backup_id}' ASYNC" + ) + + assert ( + node.query(f"SELECT status FROM system.backups WHERE id='{backup_id}'") + == "CREATING_BACKUP\n" + ) + assert ( + node.query( + f"SELECT count() FROM system.processes WHERE query_kind='Backup' AND query LIKE '%{backup_id}%'" + ) + == "1\n" + ) + + +# Test that shutdown doesn't cancel a running backup and waits until it finishes. +def test_shutdown_wait_backup(): + node.query( + "CREATE TABLE tbl (x UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY x%5" + ) + + node.query(f"INSERT INTO tbl SELECT number FROM numbers(500)") + + backup_id = uuid.uuid4().hex + start_backup(backup_id) + + node.restart_clickhouse() # Must wait for the backup. + + # The information about this backup must be stored in system.backup_log + assert node.query( + f"SELECT status FROM system.backup_log WHERE id='{backup_id}' ORDER BY status" + ) == TSV(["CREATING_BACKUP", "BACKUP_CREATED"]) + + # The table can be restored from this backup. + node.query("DROP TABLE tbl SYNC") + node.query(f"RESTORE TABLE tbl FROM {get_backup_name(backup_id)}") diff --git a/tests/integration/test_filesystem_cache/config.d/remove_filesystem_caches_path.xml b/tests/integration/test_filesystem_cache/config.d/remove_filesystem_caches_path.xml new file mode 100644 index 00000000000..57417af1a39 --- /dev/null +++ b/tests/integration/test_filesystem_cache/config.d/remove_filesystem_caches_path.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py index ab1bc4e4344..eb5f896f7a9 100644 --- a/tests/integration/test_filesystem_cache/test.py +++ b/tests/integration/test_filesystem_cache/test.py @@ -27,6 +27,14 @@ def cluster(): "config.d/storage_conf_2.xml", ], ) + cluster.add_instance( + "node_no_filesystem_caches_path", + main_configs=[ + "config.d/storage_conf.xml", + "config.d/remove_filesystem_caches_path.xml", + ], + stay_alive=True, + ) logging.info("Starting cluster...") cluster.start() @@ -194,3 +202,124 @@ def test_caches_with_the_same_configuration_2(cluster, node_name): ).strip() == f"cache1\t{size}\ncache2\t{size}" ) + + +def test_custom_cached_disk(cluster): + node = cluster.instances["node_no_filesystem_caches_path"] + + assert "Cannot create cached custom disk without" in node.query_and_get_error( + f""" + DROP TABLE IF EXISTS test SYNC; + CREATE TABLE test (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk(type = cache, path = 'kek', max_size = 1, disk = 'hdd_blob'); + """ + ) + + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + /var/lib/clickhouse/filesystem_caches/ + + " > /etc/clickhouse-server/config.d/filesystem_caches_path.xml + """, + ] + ) + node.restart_clickhouse() + + node.query( + f""" + CREATE TABLE test (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk(type = cache, name = 'custom_cached', path = 'kek', max_size = 1, disk = 'hdd_blob'); + """ + ) + + assert ( + "/var/lib/clickhouse/filesystem_caches/kek" + == node.query( + "SELECT cache_path FROM system.disks WHERE name = 'custom_cached'" + ).strip() + ) + + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + /var/lib/clickhouse/custom_caches/ + + " > /etc/clickhouse-server/config.d/custom_filesystem_caches_path.xml + """, + ] + ) + node.exec_in_container( + [ + "bash", + "-c", + "rm /etc/clickhouse-server/config.d/remove_filesystem_caches_path.xml", + ] + ) + node.restart_clickhouse() + + node.query( + f""" + CREATE TABLE test2 (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk(type = cache, name = 'custom_cached2', path = 'kek2', max_size = 1, disk = 'hdd_blob'); + """ + ) + + assert ( + "/var/lib/clickhouse/custom_caches/kek2" + == node.query( + "SELECT cache_path FROM system.disks WHERE name = 'custom_cached2'" + ).strip() + ) + + node.exec_in_container( + ["bash", "-c", "rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml"] + ) + node.restart_clickhouse() + + node.query( + f""" + CREATE TABLE test3 (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk(type = cache, name = 'custom_cached3', path = 'kek3', max_size = 1, disk = 'hdd_blob'); + """ + ) + + assert ( + "/var/lib/clickhouse/custom_caches/kek3" + == node.query( + "SELECT cache_path FROM system.disks WHERE name = 'custom_cached3'" + ).strip() + ) + + assert "Filesystem cache path must lie inside" in node.query_and_get_error( + f""" + CREATE TABLE test4 (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk(type = cache, name = 'custom_cached4', path = '/kek4', max_size = 1, disk = 'hdd_blob'); + """ + ) + + node.query( + f""" + CREATE TABLE test4 (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk(type = cache, name = 'custom_cached4', path = '/var/lib/clickhouse/custom_caches/kek4', max_size = 1, disk = 'hdd_blob'); + """ + ) + + assert ( + "/var/lib/clickhouse/custom_caches/kek4" + == node.query( + "SELECT cache_path FROM system.disks WHERE name = 'custom_cached4'" + ).strip() + ) diff --git a/tests/integration/test_parallel_replicas_custom_key/test.py b/tests/integration/test_parallel_replicas_custom_key/test.py index baac2661506..c646a678512 100644 --- a/tests/integration/test_parallel_replicas_custom_key/test.py +++ b/tests/integration/test_parallel_replicas_custom_key/test.py @@ -87,8 +87,3 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter node.contains_in_log("Processing query on a replica using custom_key") for node in nodes ) - else: - # we first transform all replicas into shards and then append for each shard filter - assert n1.contains_in_log( - "Single shard cluster used with custom_key, transforming replicas into virtual shards" - ) diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/__init__.py b/tests/integration/test_parallel_replicas_custom_key_failover/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_custom_key_failover/configs/remote_servers.xml new file mode 100644 index 00000000000..da4e2517a44 --- /dev/null +++ b/tests/integration/test_parallel_replicas_custom_key_failover/configs/remote_servers.xml @@ -0,0 +1,26 @@ + + + + + false + + n1 + 9000 + + + n2 + 1234 + + + n3 + 9000 + + + n4 + 1234 + + + + + + diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/test.py b/tests/integration/test_parallel_replicas_custom_key_failover/test.py new file mode 100644 index 00000000000..d7e73208798 --- /dev/null +++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py @@ -0,0 +1,122 @@ +import pytest +import uuid +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "n1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) +node3 = cluster.add_instance( + "n3", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) + +nodes = [node1, node3] + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def create_tables(cluster, table_name): + node1.query(f"DROP TABLE IF EXISTS {table_name} SYNC") + node3.query(f"DROP TABLE IF EXISTS {table_name} SYNC") + + node1.query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1') ORDER BY (key)" + ) + node3.query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)" + ) + + # populate data + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(1000, 1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(2000, 1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(3000, 1000)" + ) + node3.query(f"SYSTEM SYNC REPLICA {table_name}") + + +@pytest.mark.parametrize("use_hedged_requests", [1, 0]) +@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) +@pytest.mark.parametrize("filter_type", ["default", "range"]) +@pytest.mark.parametrize("prefer_localhost_replica", [0, 1]) +def test_parallel_replicas_custom_key_failover( + start_cluster, + use_hedged_requests, + custom_key, + filter_type, + prefer_localhost_replica, +): + cluster_name = "test_single_shard_multiple_replicas" + table = "test_table" + + create_tables(cluster_name, table) + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t1000\n" + + log_comment = uuid.uuid4() + assert ( + node1.query( + f"SELECT key, count() FROM cluster('{cluster_name}', currentDatabase(), test_table) GROUP BY key ORDER BY key", + settings={ + "log_comment": log_comment, + "prefer_localhost_replica": prefer_localhost_replica, + "max_parallel_replicas": 4, + "parallel_replicas_custom_key": custom_key, + "parallel_replicas_custom_key_filter_type": filter_type, + "use_hedged_requests": use_hedged_requests, + # avoid considering replica delay on connection choice + # otherwise connection can be not distributed evenly among available nodes + # and so custom key secondary queries (we check it bellow) + "max_replica_delay_for_distributed_queries": 0, + }, + ) + == expected_result + ) + + for node in nodes: + node.query("system flush logs") + + # the subqueries should be spread over available nodes + query_id = node1.query( + f"SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '{log_comment}' AND type = 'QueryFinish' AND initial_query_id = query_id" + ) + assert query_id != "" + query_id = query_id[:-1] + + if prefer_localhost_replica == 0: + assert ( + node1.query( + f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + ) + == "subqueries\t4\n" + ) + + # currently this assert is flaky with asan and tsan builds, disable the assert in such cases for now + # will be investigated separately + if ( + not node1.is_built_with_thread_sanitizer() + and not node1.is_built_with_address_sanitizer() + ): + assert ( + node1.query( + f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" + ) + == "n1\t3\nn3\t2\n" + ) diff --git a/tests/integration/test_parallel_replicas_custom_key_load_balancing/__init__.py b/tests/integration/test_parallel_replicas_custom_key_load_balancing/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_parallel_replicas_custom_key_load_balancing/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_custom_key_load_balancing/configs/remote_servers.xml new file mode 100644 index 00000000000..8b050571c3f --- /dev/null +++ b/tests/integration/test_parallel_replicas_custom_key_load_balancing/configs/remote_servers.xml @@ -0,0 +1,26 @@ + + + + + false + + n1 + 9000 + + + n2 + 9000 + + + n3 + 9000 + + + n4 + 9000 + + + + + + diff --git a/tests/integration/test_parallel_replicas_custom_key_load_balancing/test.py b/tests/integration/test_parallel_replicas_custom_key_load_balancing/test.py new file mode 100644 index 00000000000..b9d4d029703 --- /dev/null +++ b/tests/integration/test_parallel_replicas_custom_key_load_balancing/test.py @@ -0,0 +1,118 @@ +import pytest +import uuid +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "n1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) +node2 = cluster.add_instance( + "n2", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) +node3 = cluster.add_instance( + "n3", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) +node4 = cluster.add_instance( + "n4", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) + +nodes = [node1, node2, node3, node4] + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def create_tables(table_name): + for i in range(0, 4): + nodes[i].query(f"DROP TABLE IF EXISTS {table_name} SYNC") + nodes[i].query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r{i+1}') ORDER BY (key)" + ) + + # populate data + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(1000, 1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(2000, 1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(3000, 1000)" + ) + node2.query(f"SYSTEM SYNC REPLICA {table_name}") + node3.query(f"SYSTEM SYNC REPLICA {table_name}") + node4.query(f"SYSTEM SYNC REPLICA {table_name}") + + +@pytest.mark.parametrize("use_hedged_requests", [1, 0]) +@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) +@pytest.mark.parametrize("filter_type", ["default", "range"]) +def test_parallel_replicas_custom_key_load_balancing( + start_cluster, + use_hedged_requests, + custom_key, + filter_type, +): + cluster_name = "test_single_shard_multiple_replicas" + table = "test_table" + + create_tables(table) + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t1000\n" + + log_comment = uuid.uuid4() + assert ( + node1.query( + f"SELECT key, count() FROM cluster('{cluster_name}', currentDatabase(), test_table) GROUP BY key ORDER BY key", + settings={ + "log_comment": log_comment, + "prefer_localhost_replica": 0, + "max_parallel_replicas": 4, + "parallel_replicas_custom_key": custom_key, + "parallel_replicas_custom_key_filter_type": filter_type, + "use_hedged_requests": use_hedged_requests, + # avoid considering replica delay on connection choice + # otherwise connection can be not distributed evenly among available nodes + # and so custom key secondary queries (we check it bellow) + "max_replica_delay_for_distributed_queries": 0, + }, + ) + == expected_result + ) + + for node in nodes: + node.query("system flush logs") + + # the subqueries should be spread over available nodes + query_id = node1.query( + f"SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '{log_comment}' AND type = 'QueryFinish' AND initial_query_id = query_id" + ) + assert query_id != "" + query_id = query_id[:-1] + + assert ( + node1.query( + f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + ) + == "subqueries\t4\n" + ) + + # check queries per node + assert ( + node1.query( + f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" + ) + == "n1\t2\nn2\t1\nn3\t1\nn4\t1\n" + ) diff --git a/tests/integration/test_prometheus_endpoint/test.py b/tests/integration/test_prometheus_endpoint/test.py index 4eedc84b6c4..f140ebdfbe7 100644 --- a/tests/integration/test_prometheus_endpoint/test.py +++ b/tests/integration/test_prometheus_endpoint/test.py @@ -40,6 +40,8 @@ def get_and_check_metrics(retries): response = requests.get( "http://{host}:{port}/metrics".format(host=node.ip_address, port=8001), allow_redirects=False, + # less then default keep-alive timeout (10 seconds) + timeout=5, ) if response.status_code != 200: diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 30ee14c4ba9..dea1ea49851 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -5008,6 +5008,15 @@ def test_multiple_read_in_materialized_views(kafka_cluster, max_retries=15): ) assert res == expected_result + kafka_delete_topic(admin_client, topic) + instance.query( + f""" + DROP TABLE test.kafka_multiple_read_input; + DROP TABLE test.kafka_multiple_read_table; + DROP TABLE test.kafka_multiple_read_mv; + """ + ) + if __name__ == "__main__": cluster.start() diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml index 3dc8cbd70e6..2bfcf2c9157 100644 --- a/tests/performance/norm_distance.xml +++ b/tests/performance/norm_distance.xml @@ -21,6 +21,7 @@ + INSERT INTO vecs_{element_type} @@ -28,46 +29,27 @@ SELECT number AS n, [ - rand(n*10), - rand(n*10+1), - rand(n*10+2), - rand(n*10+3), - rand(n*10+4), - rand(n*10+5), - rand(n*10+6), - rand(n*10+7), - rand(n*10+8), - rand(n*10+9) + rand(n*10), rand(n*10+1), rand(n*10+2), rand(n*10+3), rand(n*10+4), rand(n*10+5), rand(n*10+6), rand(n*10+7), rand(n*10+8), rand(n*10+9), + rand(n*10+10), rand(n*10+11), rand(n*10+12), rand(n*10+13), rand(n*10+14), rand(n*10+15), rand(n*10+16), rand(n*10+17), rand(n*10+18), rand(n*10+19), + rand(n*10+20), rand(n*10+21), rand(n*10+22), rand(n*10+23), rand(n*10+24), rand(n*10+25), rand(n*10+26), rand(n*10+27), rand(n*10+28), rand(n*10+29), + rand(n*10+30), rand(n*10+31), rand(n*10+32), rand(n*10+33), rand(n*10+34), rand(n*10+35), rand(n*10+36), rand(n*10+37), rand(n*10+38), rand(n*10+39), + rand(n*10+40), rand(n*10+41), rand(n*10+42), rand(n*10+43), rand(n*10+44), rand(n*10+45), rand(n*10+46), rand(n*10+47), rand(n*10+48), rand(n*10+49), + rand(n*10+50), rand(n*10+51), rand(n*10+52), rand(n*10+53), rand(n*10+54), rand(n*10+55), rand(n*10+56), rand(n*10+57), rand(n*10+58), rand(n*10+59), + rand(n*10+60), rand(n*10+61), rand(n*10+62), rand(n*10+63), rand(n*10+64), rand(n*10+65), rand(n*10+66), rand(n*10+67), rand(n*10+68), rand(n*10+69), + rand(n*10+70), rand(n*10+71), rand(n*10+72), rand(n*10+73), rand(n*10+74), rand(n*10+75), rand(n*10+76), rand(n*10+77), rand(n*10+78), rand(n*10+79), + rand(n*10+80), rand(n*10+81), rand(n*10+82), rand(n*10+83), rand(n*10+84), rand(n*10+85), rand(n*10+86), rand(n*10+87), rand(n*10+88), rand(n*10+89), + rand(n*10+90), rand(n*10+91), rand(n*10+92), rand(n*10+93), rand(n*10+94), rand(n*10+95), rand(n*10+96), rand(n*10+97), rand(n*10+98), rand(n*10+99), + rand(n*10+100), rand(n*10+101), rand(n*10+102), rand(n*10+103), rand(n*10+104), rand(n*10+105), rand(n*10+106), rand(n*10+107), rand(n*10+108), rand(n*10+109), + rand(n*10+110), rand(n*10+111), rand(n*10+112), rand(n*10+113), rand(n*10+114), rand(n*10+115), rand(n*10+116), rand(n*10+117), rand(n*10+118), rand(n*10+119), + rand(n*10+120), rand(n*10+121), rand(n*10+122), rand(n*10+123), rand(n*10+124), rand(n*10+125), rand(n*10+126), rand(n*10+127), rand(n*10+128), rand(n*10+129), + rand(n*10+130), rand(n*10+131), rand(n*10+132), rand(n*10+133), rand(n*10+134), rand(n*10+135), rand(n*10+136), rand(n*10+137), rand(n*10+138), rand(n*10+139), + rand(n*10+140), rand(n*10+141), rand(n*10+142), rand(n*10+143), rand(n*10+144), rand(n*10+145), rand(n*10+146), rand(n*10+147), rand(n*10+148), rand(n*10+149) ] AS v FROM system.numbers - LIMIT 10000000 + LIMIT 8000000 ); - - - - CREATE TABLE tuples_{element_type} ( - t Tuple( - {element_type}, - {element_type}, - {element_type}, - {element_type}, - {element_type}, - {element_type}, - {element_type}, - {element_type}, - {element_type}, - {element_type} - ) - ) ENGINE=Memory; - - - - INSERT INTO tuples_{element_type} - SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_{element_type}; - - 1 @@ -84,17 +66,11 @@ - - SELECT sum(dist) FROM (SELECT {norm}Norm(t) AS dist FROM tuples_{element_type}) - WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT {norm}Distance(a, t) AS dist FROM tuples_{element_type}) - WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, t) AS dist FROM tuples_{element_type}) - SELECT sum(dist) FROM (SELECT {norm}Norm(v) AS dist FROM vecs_{element_type}) WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT {norm}Distance(a, v) AS dist FROM vecs_{element_type}) WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, v) AS dist FROM vecs_{element_type}) DROP TABLE vecs_{element_type} - DROP TABLE tuples_{element_type} diff --git a/tests/performance/norm_distance_float.xml b/tests/performance/norm_distance_float.xml deleted file mode 100644 index e71d8eb6281..00000000000 --- a/tests/performance/norm_distance_float.xml +++ /dev/null @@ -1,95 +0,0 @@ - - - - - element_type - - Float32 - Float64 - - - - - - CREATE TABLE vecs_{element_type} ( - v Array({element_type}) - ) ENGINE=Memory; - - - - - - INSERT INTO vecs_{element_type} - SELECT v FROM ( - SELECT - number AS n, - [ - rand(n*10), - rand(n*10+1), - rand(n*10+2), - rand(n*10+3), - rand(n*10+4), - rand(n*10+5), - rand(n*10+6), - rand(n*10+7), - rand(n*10+8), - rand(n*10+9), - rand(n*10), - rand(n*10+1), - rand(n*10+2), - rand(n*10+3), - rand(n*10+4), - rand(n*10+5), - rand(n*10+6), - rand(n*10+7), - rand(n*10+8), - rand(n*10+9), - rand(n*10), - rand(n*10+1), - rand(n*10+2), - rand(n*10+3), - rand(n*10+4), - rand(n*10+5), - rand(n*10+6), - rand(n*10+7), - rand(n*10+8), - rand(n*10+9), - rand(n*10), - rand(n*10+1), - rand(n*10+2), - rand(n*10+3), - rand(n*10+4), - rand(n*10+5), - rand(n*10+6), - rand(n*10+7) - ] AS v - FROM system.numbers - LIMIT 10000000 - ); - - - - 1 - - - - - - norm - - L1 - L2 - L2Squared - Linf - - - - - - SELECT sum(dist) FROM (SELECT {norm}Norm(v) AS dist FROM vecs_{element_type}) - WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT {norm}Distance(a, v) AS dist FROM vecs_{element_type}) - WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, v) AS dist FROM vecs_{element_type}) - - DROP TABLE vecs_{element_type} - - diff --git a/tests/performance/uniq_without_key_dist.xml b/tests/performance/uniq_without_key_dist.xml new file mode 100644 index 00000000000..600b378a7f7 --- /dev/null +++ b/tests/performance/uniq_without_key_dist.xml @@ -0,0 +1,22 @@ + + + + uniq_keys + + 100000 + 250000 + 500000 + 1000000 + 5000000 + + + + + create table t_{uniq_keys}(a UInt64) engine=MergeTree order by tuple() + + insert into t_{uniq_keys} select number % {uniq_keys} from numbers_mt(5e7) + + SELECT uniqExact(a) FROM remote('127.0.0.{{1,2}}', default, t_{uniq_keys}) SETTINGS max_threads=5 + + drop table t_{uniq_keys} + diff --git a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_long.reference b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_long.reference index ea04f155f24..1f7d5d44df4 100644 --- a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_long.reference +++ b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_long.reference @@ -1,8 +1,8 @@ SELECT - sum(n + 1), - sum(1 + n), - sum(n - 1), - sum(1 - n) + sum(n) + (1 * count(n)), + (1 * count(n)) + sum(n), + sum(n) - (1 * count(n)), + (1 * count(n)) - sum(n) FROM ( SELECT number AS n @@ -59,10 +59,10 @@ FROM FROM numbers(10) ) SELECT - sum(n + -1), - sum(-1 + n), - sum(n - -1), - sum(-1 - n) + sum(n) + (-1 * count(n)), + (-1 * count(n)) + sum(n), + sum(n) - (-1 * count(n)), + (-1 * count(n)) - sum(n) FROM ( SELECT number AS n @@ -418,7 +418,7 @@ FROM SELECT number AS n FROM numbers(10) ) -SELECT ((sum(n + 1) + sum(1 + n)) + sum(n - 1)) + sum(1 - n) +SELECT (((sum(n) + (1 * count(n))) + ((1 * count(n)) + sum(n))) + (sum(n) - (1 * count(n)))) + ((1 * count(n)) - sum(n)) FROM ( SELECT number AS n diff --git a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.reference b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.reference index 9e0d871041b..54448ba3b68 100644 --- a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.reference +++ b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.reference @@ -8,3 +8,21 @@ FROM WHERE (a > 0) AND (b > 0) HAVING c > 0 2 +SELECT min(n) + 1 AS c +FROM +( + SELECT number AS n + FROM numbers(10) + WHERE (n + 1) > 0 +) +WHERE ((n + 1) AS a) > 0 +HAVING c > 0 +1 +SELECT min(n) + 1 AS c +FROM +( + SELECT number AS n + FROM numbers(10) +) +HAVING c > 0 +1 diff --git a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql index 242a253e67c..7c27994aca8 100644 --- a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql +++ b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql @@ -3,3 +3,9 @@ SET convert_query_to_cnf = 0; explain syntax select min((n as a) + (1 as b)) c from (select number n from numbers(10)) where a > 0 and b > 0 having c > 0; select min((n as a) + (1 as b)) c from (select number n from numbers(10)) where a > 0 and b > 0 having c > 0; + +explain syntax select min((n + 1) as a) c from (select number n from numbers(10)) where a > 0 having c > 0; +select min((n + 1) as a) c from (select number n from numbers(10)) where a > 0 having c > 0; + +explain syntax select min(n + 1) as c from (select number n from numbers(10)) having c > 0; +select min(n + 1) c from (select number n from numbers(10)) having c > 0; diff --git a/tests/queries/0_stateless/01361_fover_remote_num_tries.reference b/tests/queries/0_stateless/01361_fover_remote_num_tries.reference index 64bb6b746dc..209e3ef4b62 100644 --- a/tests/queries/0_stateless/01361_fover_remote_num_tries.reference +++ b/tests/queries/0_stateless/01361_fover_remote_num_tries.reference @@ -1 +1 @@ -30 +20 diff --git a/tests/queries/0_stateless/01361_fover_remote_num_tries.sh b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh index f07ffc02e4f..9d9c6b920b6 100755 --- a/tests/queries/0_stateless/01361_fover_remote_num_tries.sh +++ b/tests/queries/0_stateless/01361_fover_remote_num_tries.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o -P 'Timeout exceeded while connecting to socket|Network is unreachable|Timeout: connect timed out' | wc -l +$CLICKHOUSE_CLIENT --connections_with_failover_max_tries 10 --connect_timeout_with_failover_ms 1 --query "SELECT hostName() FROM remote('128.1.2.3', default.tmp)" 2>&1 | grep -o -P 'Timeout exceeded while connecting to socket|Network is unreachable|Timeout: connect timed out' | wc -l diff --git a/tests/queries/0_stateless/01470_columns_transformers.reference b/tests/queries/0_stateless/01470_columns_transformers.reference index 8fa86582018..8eab5a16b8b 100644 --- a/tests/queries/0_stateless/01470_columns_transformers.reference +++ b/tests/queries/0_stateless/01470_columns_transformers.reference @@ -49,10 +49,43 @@ SELECT any(toDate(k)) FROM columns_transformers AS a SELECT - sum(i + 1 AS i), + sum(i) + (1 * count(i)), sum(j), sum(k) FROM columns_transformers +SelectWithUnionQuery (children 1) + ExpressionList (children 1) + SelectQuery (children 2) + ExpressionList (children 1) + Asterisk (children 1) + ColumnsTransformerList (children 2) + ColumnsReplaceTransformer (children 1) + ColumnsReplaceTransformer::Replacement (children 1) + Function plus (children 1) + ExpressionList (children 2) + Identifier i + Literal UInt64_1 + ColumnsApplyTransformer + TablesInSelectQuery (children 1) + TablesInSelectQueryElement (children 1) + TableExpression (children 1) + TableIdentifier columns_transformers +SELECT sum(i) + (1 * count(i)) +FROM columns_transformers +SelectWithUnionQuery (children 1) + ExpressionList (children 1) + SelectQuery (children 2) + ExpressionList (children 1) + Function sum (children 1) + ExpressionList (children 1) + Function plus (alias m) (children 1) + ExpressionList (children 2) + Identifier i + Literal UInt64_1 + TablesInSelectQuery (children 1) + TablesInSelectQueryElement (children 1) + TableExpression (children 1) + TableIdentifier columns_transformers SELECT avg(i) + 1, avg(j) + 2, diff --git a/tests/queries/0_stateless/01470_columns_transformers.sql b/tests/queries/0_stateless/01470_columns_transformers.sql index 8840ce3f3b5..1490dabdcec 100644 --- a/tests/queries/0_stateless/01470_columns_transformers.sql +++ b/tests/queries/0_stateless/01470_columns_transformers.sql @@ -35,6 +35,9 @@ EXPLAIN SYNTAX SELECT * EXCEPT(i) APPLY(sum) from columns_transformers; EXPLAIN SYNTAX SELECT columns_transformers.* EXCEPT(j) APPLY(avg) from columns_transformers; EXPLAIN SYNTAX SELECT a.* APPLY(toDate) EXCEPT(i, j) APPLY(any) from columns_transformers a; EXPLAIN SYNTAX SELECT * REPLACE(i + 1 AS i) APPLY(sum) from columns_transformers; +EXPLAIN AST SELECT * REPLACE(i + 1 AS i) APPLY(sum) from columns_transformers; +EXPLAIN SYNTAX SELECT sum(i + 1 AS m) from columns_transformers; +EXPLAIN AST SELECT sum(i + 1 AS m) from columns_transformers; EXPLAIN SYNTAX SELECT columns_transformers.* REPLACE(j + 2 AS j, i + 1 AS i) APPLY(avg) from columns_transformers; EXPLAIN SYNTAX SELECT a.* APPLY(toDate) REPLACE(i + 1 AS i) APPLY(any) from columns_transformers a; diff --git a/tests/queries/0_stateless/02252_jit_profile_events.sql b/tests/queries/0_stateless/02252_jit_profile_events.sql index eca3c06f9f1..fbd6040c21c 100644 --- a/tests/queries/0_stateless/02252_jit_profile_events.sql +++ b/tests/queries/0_stateless/02252_jit_profile_events.sql @@ -19,13 +19,13 @@ SELECT ProfileEvents['CompileFunction'] FROM system.query_log WHERE SET compile_aggregate_expressions = 1; SET min_count_to_compile_aggregate_expression = 0; -SELECT sum(number), sum(number + 1), sum(number + 2) FROM numbers(1) GROUP BY number; +SELECT avg(number), avg(number + 1), avg(number + 2) FROM numbers(1) GROUP BY number; SYSTEM FLUSH LOGS; SELECT ProfileEvents['CompileFunction'] FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish' - AND query == 'SELECT sum(number), sum(number + 1), sum(number + 2) FROM numbers(1) GROUP BY number;' + AND query == 'SELECT avg(number), avg(number + 1), avg(number + 2) FROM numbers(1) GROUP BY number;' AND event_date >= yesterday() AND event_time > now() - interval 10 minute LIMIT 1; diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference index ec4928bc325..608fac626fa 100644 --- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference +++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference @@ -20,21 +20,9 @@ 23 Sx>b:^UG XpedE)Q: 7433019734386307503 29 2j&S)ba?XG QuQj 17163829389637435056 3 UlI+1 14144472852965836438 -0 PJFiUe#J2O _s\' 14427935816175499794 -1 >T%O ,z< 17537932797009027240 -12 D[6,P #}Lmb[ ZzU 6394957109822140795 -18 $_N- 24422838680427462 -2 bX?}ix [ Ny]2 G 16242612901291874718 -20 VE] Y 15120036904703536841 -22 Ti~3)N)< A!( 3 18361093572663329113 -23 Sx>b:^UG XpedE)Q: 7433019734386307503 -29 2j&S)ba?XG QuQj 17163829389637435056 -3 UlI+1 14144472852965836438 =============== QUERIES EXECUTED BY PARALLEL INNER QUERY ALONE =============== -0 3 SELECT `__table1`.`key` AS `key`, `__table1`.`value1` AS `value1`, `__table1`.`value2` AS `value2`, toUInt64(min(`__table1`.`time`)) AS `start_ts` FROM `default`.`join_inner_table` AS `__table1` PREWHERE (`__table1`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table1`.`number` > 1610517366120) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2` ORDER BY `__table1`.`key` ASC, `__table1`.`value1` ASC, `__table1`.`value2` ASC LIMIT _CAST(10, \'UInt64\') SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer = 1 0 3 SELECT `key`, `value1`, `value2`, toUInt64(min(`time`)) AS `start_ts` FROM `default`.`join_inner_table` PREWHERE (`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`number` > toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2` ORDER BY `key` ASC, `value1` ASC, `value2` ASC LIMIT 10 -1 1 -- Parallel inner query alone\nSELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0; -1 1 -- Parallel inner query alone\nSELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1; +1 1 -- Parallel inner query alone\nSELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1; =============== OUTER QUERY (NO PARALLEL) =============== >T%O ,z< 10 NQTpY# W\\Xx4 10 @@ -51,16 +39,6 @@ U c 10 UlI+1 10 bX?}ix [ Ny]2 G 10 tT%O ,z< 10 -NQTpY# W\\Xx4 10 -PJFiUe#J2O _s\' 10 -U c 10 -UlI+1 10 -bX?}ix [ Ny]2 G 10 -t 1610517366120) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2` -0 3 SELECT `__table2`.`value1` AS `value1`, `__table2`.`value2` AS `value2`, count() AS `count` FROM `default`.`join_outer_table` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` USING (`key`) GROUP BY `__table1`.`key`, `__table2`.`value1`, `__table2`.`value2` 0 3 SELECT `key`, `value1`, `value2` FROM `default`.`join_inner_table` PREWHERE (`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`number` > toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2` -0 3 SELECT `value1`, `value2`, count() AS `count` FROM `default`.`join_outer_table` ALL INNER JOIN `_data_` USING (`key`) GROUP BY `key`, `value1`, `value2` -1 1 -- Parallel full query\nSELECT\n value1,\n value2,\n avg(count) AS avg\nFROM\n (\n SELECT\n key,\n value1,\n value2,\n count() AS count\n FROM join_outer_table\n INNER JOIN\n (\n SELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\n FROM join_inner_table\n PREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\n GROUP BY key, value1, value2\n ) USING (key)\n GROUP BY key, value1, value2\n )\nGROUP BY value1, value2\nORDER BY value1, value2\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0; -1 1 -- Parallel full query\nSELECT\n value1,\n value2,\n avg(count) AS avg\nFROM\n (\n SELECT\n key,\n value1,\n value2,\n count() AS count\n FROM join_outer_table\n INNER JOIN\n (\n SELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\n FROM join_inner_table\n PREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\n GROUP BY key, value1, value2\n ) USING (key)\n GROUP BY key, value1, value2\n )\nGROUP BY value1, value2\nORDER BY value1, value2\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1; +0 3 SELECT `value1`, `value2`, count() AS `count` FROM `default`.`join_outer_table` ALL INNER JOIN `_data_7105554115296635472_12427301373021079614` USING (`key`) GROUP BY `key`, `value1`, `value2` +1 1 -- Parallel full query\nSELECT\n value1,\n value2,\n avg(count) AS avg\nFROM\n (\n SELECT\n key,\n value1,\n value2,\n count() AS count\n FROM join_outer_table\n INNER JOIN\n (\n SELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\n FROM join_inner_table\n PREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\n GROUP BY key, value1, value2\n ) USING (key)\n GROUP BY key, value1, value2\n )\nGROUP BY value1, value2\nORDER BY value1, value2\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1; diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql index 7693d0da295..a117378b0bf 100644 --- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql +++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql @@ -21,6 +21,7 @@ SELECT * FROM generateRandom('number Int64, value1 String, value2 String, time Int64', 1, 10, 2) LIMIT 100; +SET allow_experimental_analyzer = 0; SET max_parallel_replicas = 3; SET prefer_localhost_replica = 1; SET cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost'; @@ -38,18 +39,6 @@ FROM join_inner_table GROUP BY key, value1, value2 ORDER BY key, value1, value2 LIMIT 10; --- settings allow_experimental_analyzer=0; - --- SELECT --- key, --- value1, --- value2, --- toUInt64(min(time)) AS start_ts --- FROM join_inner_table --- PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120')) --- GROUP BY key, value1, value2 --- ORDER BY key, value1, value2 --- LIMIT 10 settings allow_experimental_analyzer=1; SELECT '=============== INNER QUERY (PARALLEL) ==============='; @@ -64,31 +53,18 @@ PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1 GROUP BY key, value1, value2 ORDER BY key, value1, value2 LIMIT 10 -SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0; - --- Parallel inner query alone -SELECT - key, - value1, - value2, - toUInt64(min(time)) AS start_ts -FROM join_inner_table -PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120')) -GROUP BY key, value1, value2 -ORDER BY key, value1, value2 -LIMIT 10 -SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1; +SETTINGS allow_experimental_parallel_reading_from_replicas = 1; SELECT '=============== QUERIES EXECUTED BY PARALLEL INNER QUERY ALONE ==============='; SYSTEM FLUSH LOGS; -- There should be 4 queries. The main query as received by the initiator and the 3 equal queries sent to each replica -SELECT is_initial_query, count() as c, replaceRegexpAll(query, '_data_(\d+)_(\d+)', '_data_') as query +SELECT is_initial_query, count() as c, query, FROM system.query_log WHERE event_date >= yesterday() AND type = 'QueryFinish' - AND initial_query_id IN + AND initial_query_id = ( SELECT query_id FROM system.query_log @@ -184,48 +160,18 @@ FROM ) GROUP BY value1, value2 ORDER BY value1, value2 -SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0; - --- Parallel full query -SELECT - value1, - value2, - avg(count) AS avg -FROM - ( - SELECT - key, - value1, - value2, - count() AS count - FROM join_outer_table - INNER JOIN - ( - SELECT - key, - value1, - value2, - toUInt64(min(time)) AS start_ts - FROM join_inner_table - PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120')) - GROUP BY key, value1, value2 - ) USING (key) - GROUP BY key, value1, value2 - ) -GROUP BY value1, value2 -ORDER BY value1, value2 -SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1; +SETTINGS allow_experimental_parallel_reading_from_replicas = 1; SYSTEM FLUSH LOGS; -- There should be 7 queries. The main query as received by the initiator, the 3 equal queries to execute the subquery -- in the inner join and the 3 queries executing the whole query (but replacing the subquery with a temp table) -SELECT is_initial_query, count() as c, replaceRegexpAll(query, '_data_(\d+)_(\d+)', '_data_') as query +SELECT is_initial_query, count() as c, query, FROM system.query_log WHERE event_date >= yesterday() AND type = 'QueryFinish' - AND initial_query_id IN + AND initial_query_id = ( SELECT query_id FROM system.query_log diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference index 521e3e2edbc..802d55f8ae3 100644 --- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference +++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference @@ -1,4 +1,6 @@ +02784_automatic_parallel_replicas_join-default_simple_join_10M_pure 0 estimated parallel replicas 02784_automatic_parallel_replicas_join-default_simple_join_10M_pure 1 estimated parallel replicas +02784_automatic_parallel_replicas_join-default_simple_join_5M_pure 0 estimated parallel replicas 02784_automatic_parallel_replicas_join-default_simple_join_5M_pure 2 estimated parallel replicas 02784_automatic_parallel_replicas_join-default_simple_join_1M_pure 1 estimated parallel replicas 02784_automatic_parallel_replicas_join-default_simple_join_1M_pure 10 estimated parallel replicas diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh index 1a74c3230c6..baeeb820da5 100755 --- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh +++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh @@ -68,7 +68,7 @@ function run_query_with_pure_parallel_replicas () { --allow_experimental_parallel_reading_from_replicas 1 \ --parallel_replicas_for_non_replicated_merge_tree 1 \ --parallel_replicas_min_number_of_rows_per_replica "$2" \ - |& grep "It is enough work for" | awk '{ print substr($7, 2, length($7) - 2) "\t" $20 " estimated parallel replicas" }' | sort -n -k2 -b | grep -Pv "\t0 estimated parallel replicas" + |& grep "It is enough work for" | awk '{ print substr($7, 2, length($7) - 2) "\t" $20 " estimated parallel replicas" }' } query_id_base="02784_automatic_parallel_replicas_join-$CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02891_array_shingles.reference b/tests/queries/0_stateless/02891_array_shingles.reference index 04f7bfa331f..00bd9f6bb41 100644 --- a/tests/queries/0_stateless/02891_array_shingles.reference +++ b/tests/queries/0_stateless/02891_array_shingles.reference @@ -1,41 +1,11 @@ -- negative tests -- const and non-const inputs -Row 1: -────── -arr: [1,2,3,4,5] -len: 1 -arrayShingles([1, 2, 3, 4, 5], 1): [[1],[2],[3],[4],[5]] -arrayShingles(materialize([1, 2, 3, 4, 5]), 1): [[1],[2],[3],[4],[5]] -Row 1: -────── -arr: [1,2,3,4,5] -len: 3 -arrayShingles([1, 2, 3, 4, 5], 3): [[1,2,3],[2,3,4],[3,4,5]] -arrayShingles(materialize([1, 2, 3, 4, 5]), 3): [[1,2,3],[2,3,4],[3,4,5]] -Row 1: -────── -arr: [1,2,3,4,5] -len: 5 -arrayShingles([1, 2, 3, 4, 5], 5): [[1,2,3,4,5]] -arrayShingles(materialize([1, 2, 3, 4, 5]), 5): [[1,2,3,4,5]] -Row 1: -────── -arr: ['ab','c','de','','hi'] -len: 1 -arrayShingles(['ab', 'c', 'de', '', 'hi'], 1): [['ab'],['c'],['de'],[''],['hi']] -arrayShingles(materialize(['ab', 'c', 'de', '', 'hi']), 1): [['ab'],['c'],['de'],[''],['hi']] -Row 1: -────── -arr: ['ab','c','de','','hi'] -len: 3 -arrayShingles(['ab', 'c', 'de', '', 'hi'], 3): [['ab','c','de'],['c','de',''],['de','','hi']] -arrayShingles(materialize(['ab', 'c', 'de', '', 'hi']), 3): [['ab','c','de'],['c','de',''],['de','','hi']] -Row 1: -────── -arr: ['ab','c','de','','hi'] -len: 5 -arrayShingles(['ab', 'c', 'de', '', 'hi'], 5): [['ab','c','de','','hi']] -arrayShingles(materialize(['ab', 'c', 'de', '', 'hi']), 5): [['ab','c','de','','hi']] +[1,2,3,4,5] 1 [[1],[2],[3],[4],[5]] [[1],[2],[3],[4],[5]] +[1,2,3,4,5] 3 [[1,2,3],[2,3,4],[3,4,5]] [[1,2,3],[2,3,4],[3,4,5]] +[1,2,3,4,5] 5 [[1,2,3,4,5]] [[1,2,3,4,5]] +['ab','c','de','','hi'] 1 [['ab'],['c'],['de'],[''],['hi']] [['ab'],['c'],['de'],[''],['hi']] +['ab','c','de','','hi'] 3 [['ab','c','de'],['c','de',''],['de','','hi']] [['ab','c','de'],['c','de',''],['de','','hi']] +['ab','c','de','','hi'] 5 [['ab','c','de','','hi']] [['ab','c','de','','hi']] -- special cases [[2],[1]] [[2],[1]] diff --git a/tests/queries/0_stateless/02891_array_shingles.sql b/tests/queries/0_stateless/02891_array_shingles.sql index f199a445a30..e2b5cde880f 100644 --- a/tests/queries/0_stateless/02891_array_shingles.sql +++ b/tests/queries/0_stateless/02891_array_shingles.sql @@ -10,13 +10,13 @@ SELECT arrayShingles([1, 2, 3, 4, 5], 6); -- { serverError BAD_ARGUMENTS } SELECT arrayShingles([], 1); -- { serverError BAD_ARGUMENTS } SELECT '-- const and non-const inputs'; -SELECT [1, 2, 3, 4, 5] AS arr, 1 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len) FORMAT Vertical; -SELECT [1, 2, 3, 4, 5] AS arr, 3 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len) FORMAT Vertical; -SELECT [1, 2 ,3, 4, 5] AS arr, 5 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len) FORMAT Vertical; +SELECT [1, 2, 3, 4, 5] AS arr, 1 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len); +SELECT [1, 2, 3, 4, 5] AS arr, 3 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len); +SELECT [1, 2 ,3, 4, 5] AS arr, 5 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len); -SELECT ['ab', 'c', 'de', '', 'hi'] AS arr, 1 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len) FORMAT Vertical; -SELECT ['ab', 'c', 'de', '', 'hi'] AS arr, 3 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len) FORMAT Vertical; -SELECT ['ab', 'c', 'de', '', 'hi'] AS arr, 5 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len) FORMAT Vertical; +SELECT ['ab', 'c', 'de', '', 'hi'] AS arr, 1 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len); +SELECT ['ab', 'c', 'de', '', 'hi'] AS arr, 3 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len); +SELECT ['ab', 'c', 'de', '', 'hi'] AS arr, 5 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len); SELECT '-- special cases'; SELECT arrayShingles([toNullable(2), toNullable(1)], 1); diff --git a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql index 439b8b3f032..ec86a66c7dd 100644 --- a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql +++ b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql @@ -1,10 +1,10 @@ -- Tags: no-fasttest -- Query stops after timeout without an error -SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=13, timeout_overflow_mode='break' FORMAT Null; +SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=2, timeout_overflow_mode='break' FORMAT Null; --- Query returns an error when runtime is estimated after 10 sec of execution -SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=13, timeout_overflow_mode='throw' FORMAT Null; -- { serverError TOO_SLOW } +-- Query returns an error when runtime is estimated after timeout_before_checking_execution_speed passed +SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, timeout_before_checking_execution_speed=1, max_estimated_execution_time=2, timeout_overflow_mode='throw' FORMAT Null; -- { serverError TOO_SLOW } -- Query returns timeout error before its full execution time is estimated -SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=2, timeout_overflow_mode='throw' FORMAT Null; -- { serverError TIMEOUT_EXCEEDED } +SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, timeout_before_checking_execution_speed=1, max_execution_time=2, timeout_overflow_mode='throw' FORMAT Null; -- { serverError TIMEOUT_EXCEEDED } diff --git a/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.reference b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.reference new file mode 100644 index 00000000000..2d97dd0e12e --- /dev/null +++ b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.reference @@ -0,0 +1,29 @@ +-- { echoOn } +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; +0 250 +1 250 +2 250 +3 250 +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='range'; +0 250 +1 250 +2 250 +3 250 +SET use_hedged_requests=0; +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; +0 250 +1 250 +2 250 +3 250 diff --git a/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql new file mode 100644 index 00000000000..b9bc6974c47 --- /dev/null +++ b/tests/queries/0_stateless/02918_parallel_replicas_custom_key_unavailable_replica.sql @@ -0,0 +1,30 @@ +DROP TABLE IF EXISTS 02918_parallel_replicas; + +CREATE TABLE 02918_parallel_replicas (x String, y Int32) ENGINE = MergeTree ORDER BY cityHash64(x); + +INSERT INTO 02918_parallel_replicas SELECT toString(number), number % 4 FROM numbers(1000); + +SET prefer_localhost_replica=0; + +-- { echoOn } +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; + +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='range'; + +SET use_hedged_requests=0; +SELECT y, count() +FROM cluster(test_cluster_1_shard_3_replicas_1_unavailable, currentDatabase(), 02918_parallel_replicas) +GROUP BY y +ORDER BY y +SETTINGS max_parallel_replicas=3, parallel_replicas_custom_key='cityHash64(y)', parallel_replicas_custom_key_filter_type='default'; +-- { echoOff } + +DROP TABLE 02918_parallel_replicas; diff --git a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference new file mode 100644 index 00000000000..8f29910e9ae --- /dev/null +++ b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference @@ -0,0 +1,476 @@ +-- { echoOn } +Select sum(number + 1) from numbers(10); +55 +Select sum(1 + number) from numbers(10); +55 +Select sum(number - 1) from numbers(10); +35 +Select sum(1 - number) from numbers(10); +-35 +EXPLAIN SYNTAX (Select sum(number + 1) from numbers(10)); +SELECT sum(number) + (1 * count(number)) +FROM numbers(10) +EXPLAIN SYNTAX (Select sum(1 + number) from numbers(10)); +SELECT (1 * count(number)) + sum(number) +FROM numbers(10) +EXPLAIN SYNTAX (Select sum(number - 1) from numbers(10)); +SELECT sum(number) - (1 * count(number)) +FROM numbers(10) +EXPLAIN SYNTAX (Select sum(1 - number) from numbers(10)); +SELECT (1 * count(number)) - sum(number) +FROM numbers(10) +WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0); +\N +WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0); +0 +EXPLAIN SYNTAX (WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0)); +WITH CAST(\'1\', \'Nullable(UInt64)\') AS my_literal +SELECT sum(number + my_literal) +FROM numbers(0) +EXPLAIN SYNTAX (WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0)); +WITH CAST(\'1\', \'Nullable(UInt64)\') AS my_literal +SELECT sum(number) + (my_literal * count()) +FROM numbers(0) +-- { echoOn } +SELECT sum(uint64 + 1 AS i) from test_table where i > 0; +20 +SELECT sum(uint64 + 1) AS j from test_table having j > 0; +20 +SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0; +20 +SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +20 +SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +20 +EXPLAIN SYNTAX (SELECT sum(uint64 + 1 AS i) from test_table where i > 0); +SELECT sum(uint64) + (1 * count(uint64)) +FROM test_table +WHERE ((uint64 + 1) AS i) > 0 +EXPLAIN SYNTAX (SELECT sum(uint64 + 1) AS j from test_table having j > 0); +SELECT sum(uint64) + (1 * count(uint64)) +FROM test_table +HAVING (sum(uint64) + (1 * count(uint64))) > 0 +EXPLAIN SYNTAX (SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0); +SELECT sum(uint64) + (1 * count(uint64)) +FROM test_table +WHERE ((uint64 + 1) AS i) > 0 +HAVING (sum(uint64) + (1 * count(uint64))) > 0 +EXPLAIN SYNTAX (SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +SELECT sum(uint64) + ((1 AS n) * count(uint64)) +FROM test_table +WHERE ((uint64 AS m) > 0) AND (n > 0) +HAVING (sum(uint64) + (n * count(uint64))) > 0 +EXPLAIN SYNTAX (SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); +SELECT sum(uint64) + ((1 AS n) * count(uint64)) +FROM test_table +WHERE ((uint64 AS m) > 0) AND (n > 0) AND (((m + n) AS i) > 0) +HAVING (sum(uint64) + (n * count(uint64))) > 0 +SELECT sum(1 + uint64 AS i) from test_table where i > 0; +20 +SELECT sum(1 + uint64) AS j from test_table having j > 0; +20 +SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0; +20 +SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +20 +SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +20 +EXPLAIN SYNTAX (SELECT sum(1 + uint64 AS i) from test_table where i > 0); +SELECT (1 * count(uint64)) + sum(uint64) +FROM test_table +WHERE ((1 + uint64) AS i) > 0 +EXPLAIN SYNTAX (SELECT sum(1 + uint64) AS j from test_table having j > 0); +SELECT (1 * count(uint64)) + sum(uint64) +FROM test_table +HAVING ((1 * count(uint64)) + sum(uint64)) > 0 +EXPLAIN SYNTAX (SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0); +SELECT (1 * count(uint64)) + sum(uint64) +FROM test_table +WHERE ((1 + uint64) AS i) > 0 +HAVING ((1 * count(uint64)) + sum(uint64)) > 0 +EXPLAIN SYNTAX (SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +SELECT ((1 AS m) * count(uint64)) + sum(uint64) +FROM test_table +WHERE (m > 0) AND ((uint64 AS n) > 0) +HAVING ((m * count(uint64)) + sum(uint64)) > 0 +EXPLAIN SYNTAX (SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); +SELECT ((1 AS m) * count(uint64)) + sum(uint64) +FROM test_table +WHERE (m > 0) AND ((uint64 AS n) > 0) AND (((m + n) AS i) > 0) +HAVING ((m * count(uint64)) + sum(uint64)) > 0 +SELECT sum(uint64 - 1 AS i) from test_table where i > 0; +10 +SELECT sum(uint64 - 1) AS j from test_table having j > 0; +10 +SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0; +10 +SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +10 +SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +10 +EXPLAIN SYNTAX (SELECT sum(uint64 - 1 AS i) from test_table where i > 0); +SELECT sum(uint64) - (1 * count(uint64)) +FROM test_table +WHERE ((uint64 - 1) AS i) > 0 +EXPLAIN SYNTAX (SELECT sum(uint64 - 1) AS j from test_table having j > 0); +SELECT sum(uint64) - (1 * count(uint64)) +FROM test_table +HAVING (sum(uint64) - (1 * count(uint64))) > 0 +EXPLAIN SYNTAX (SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0); +SELECT sum(uint64) - (1 * count(uint64)) +FROM test_table +WHERE ((uint64 - 1) AS i) > 0 +HAVING (sum(uint64) - (1 * count(uint64))) > 0 +EXPLAIN SYNTAX (SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +SELECT sum(uint64) - ((1 AS n) * count(uint64)) +FROM test_table +WHERE ((uint64 AS m) > 0) AND (n > 0) +HAVING (sum(uint64) - (n * count(uint64))) > 0 +EXPLAIN SYNTAX (SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); +SELECT sum(uint64) - ((1 AS n) * count(uint64)) +FROM test_table +WHERE ((uint64 AS m) > 0) AND (n > 0) AND (((m - n) AS i) > 0) +HAVING (sum(uint64) - (n * count(uint64))) > 0 +SELECT sum(1 - uint64 AS i) from test_table; +-10 +SELECT sum(1 - uint64) AS j from test_table; +-10 +SELECT sum(1 - uint64 AS i) j from test_table; +-10 +SELECT sum((1 AS m) - (uint64 AS n)) j from test_table; +-10 +SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table; +-10 +EXPLAIN SYNTAX (SELECT sum(1 - uint64 AS i) from test_table where i > 0); +SELECT (1 * count(uint64)) - sum(uint64) +FROM test_table +WHERE ((1 - uint64) AS i) > 0 +EXPLAIN SYNTAX (SELECT sum(1 - uint64) AS j from test_table having j < 0); +SELECT (1 * count(uint64)) - sum(uint64) +FROM test_table +HAVING ((1 * count(uint64)) - sum(uint64)) < 0 +EXPLAIN SYNTAX (SELECT sum(1 - uint64 AS i) j from test_table where i > 0 having j < 0); +SELECT (1 * count(uint64)) - sum(uint64) +FROM test_table +WHERE ((1 - uint64) AS i) > 0 +HAVING ((1 * count(uint64)) - sum(uint64)) < 0 +EXPLAIN SYNTAX (SELECT sum((1 AS m) - (uint64 AS n)) j from test_table where m > 0 and n > 0 having j < 0); +SELECT ((1 AS m) * count(uint64)) - sum(uint64) +FROM test_table +WHERE (m > 0) AND ((uint64 AS n) > 0) +HAVING ((m * count(uint64)) - sum(uint64)) < 0 +EXPLAIN SYNTAX (SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i < 0 having j < 0); +SELECT ((1 AS m) * count(uint64)) - sum(uint64) +FROM test_table +WHERE (m > 0) AND ((uint64 AS n) > 0) AND (((m - n) AS i) < 0) +HAVING ((m * count(uint64)) - sum(uint64)) < 0 +SELECT sum(uint64 + 2.11) From test_table; +25.549999999999997 +SELECT sum(2.11 + uint64) From test_table; +25.549999999999997 +SELECT sum(uint64 - 2.11) From test_table; +4.450000000000001 +SELECT sum(2.11 - uint64) From test_table; +-4.450000000000001 +SELECT sum(uint64) + 2.11 * count(uint64) From test_table; +25.549999999999997 +SELECT 2.11 * count(uint64) + sum(uint64) From test_table; +25.549999999999997 +SELECT sum(uint64) - 2.11 * count(uint64) From test_table; +4.450000000000001 +SELECT 2.11 * count(uint64) - sum(uint64) From test_table; +-4.450000000000001 +EXPLAIN SYNTAX (SELECT sum(uint64 + 2.11) From test_table); +SELECT sum(uint64) + (2.11 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2.11 + uint64) From test_table); +SELECT (2.11 * count(uint64)) + sum(uint64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64 - 2.11) From test_table); +SELECT sum(uint64) - (2.11 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2.11 - uint64) From test_table); +SELECT (2.11 * count(uint64)) - sum(uint64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64) + 2.11 * count(uint64) From test_table); +SELECT sum(uint64) + (2.11 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2.11 * count(uint64) + sum(uint64) From test_table); +SELECT (2.11 * count(uint64)) + sum(uint64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64) - 2.11 * count(uint64) From test_table); +SELECT sum(uint64) - (2.11 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2.11 * count(uint64) - sum(uint64) From test_table); +SELECT (2.11 * count(uint64)) - sum(uint64) +FROM test_table +SELECT sum(uint64 + 2) From test_table; +25 +SELECT sum(2 + uint64) From test_table; +25 +SELECT sum(uint64 - 2) From test_table; +5 +SELECT sum(2 - uint64) From test_table; +-5 +SELECT sum(uint64) + 2 * count(uint64) From test_table; +25 +SELECT 2 * count(uint64) + sum(uint64) From test_table; +25 +SELECT sum(uint64) - 2 * count(uint64) From test_table; +5 +SELECT 2 * count(uint64) - sum(uint64) From test_table; +-5 +EXPLAIN SYNTAX (SELECT sum(uint64 + 2) From test_table); +SELECT sum(uint64) + (2 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 + uint64) From test_table); +SELECT (2 * count(uint64)) + sum(uint64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64 - 2) From test_table); +SELECT sum(uint64) - (2 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 - uint64) From test_table); +SELECT (2 * count(uint64)) - sum(uint64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64) + 2 * count(uint64) From test_table); +SELECT sum(uint64) + (2 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2 * count(uint64) + sum(uint64) From test_table); +SELECT (2 * count(uint64)) + sum(uint64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64) - 2 * count(uint64) From test_table); +SELECT sum(uint64) - (2 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2 * count(uint64) - sum(uint64) From test_table); +SELECT (2 * count(uint64)) - sum(uint64) +FROM test_table +SELECT sum(float64 + 2) From test_table; +26.5 +SELECT sum(2 + float64) From test_table; +26.5 +SELECT sum(float64 - 2) From test_table; +6.5 +SELECT sum(2 - float64) From test_table; +-6.5 +SELECT sum(float64) + 2 * count(float64) From test_table; +26.5 +SELECT 2 * count(float64) + sum(float64) From test_table; +26.5 +SELECT sum(float64) - 2 * count(float64) From test_table; +6.5 +SELECT 2 * count(float64) - sum(float64) From test_table; +-6.5 +EXPLAIN SYNTAX (SELECT sum(float64 + 2) From test_table); +SELECT sum(float64) + (2 * count(float64)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 + float64) From test_table); +SELECT (2 * count(float64)) + sum(float64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(float64 - 2) From test_table); +SELECT sum(float64) - (2 * count(float64)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 - float64) From test_table); +SELECT (2 * count(float64)) - sum(float64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(float64) + 2 * count(float64) From test_table); +SELECT sum(float64) + (2 * count(float64)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2 * count(float64) + sum(float64) From test_table); +SELECT (2 * count(float64)) + sum(float64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(float64) - 2 * count(float64) From test_table); +SELECT sum(float64) - (2 * count(float64)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2 * count(float64) - sum(float64) From test_table); +SELECT (2 * count(float64)) - sum(float64) +FROM test_table +SELECT sum(decimal32 + 2) From test_table; +26.65 +SELECT sum(2 + decimal32) From test_table; +26.65 +SELECT sum(decimal32 - 2) From test_table; +6.65 +SELECT sum(2 - decimal32) From test_table; +-6.65 +SELECT sum(decimal32) + 2 * count(decimal32) From test_table; +26.65 +SELECT 2 * count(decimal32) + sum(decimal32) From test_table; +26.65 +SELECT sum(decimal32) - 2 * count(decimal32) From test_table; +6.65 +SELECT 2 * count(decimal32) - sum(decimal32) From test_table; +-6.65 +EXPLAIN SYNTAX (SELECT sum(decimal32 + 2) From test_table); +SELECT sum(decimal32) + (2 * count(decimal32)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 + decimal32) From test_table); +SELECT (2 * count(decimal32)) + sum(decimal32) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(decimal32 - 2) From test_table); +SELECT sum(decimal32) - (2 * count(decimal32)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 - decimal32) From test_table); +SELECT (2 * count(decimal32)) - sum(decimal32) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(decimal32) + 2 * count(decimal32) From test_table); +SELECT sum(decimal32) + (2 * count(decimal32)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2 * count(decimal32) + sum(decimal32) From test_table); +SELECT (2 * count(decimal32)) + sum(decimal32) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(decimal32) - 2 * count(decimal32) From test_table); +SELECT sum(decimal32) - (2 * count(decimal32)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2 * count(decimal32) - sum(decimal32) From test_table); +SELECT (2 * count(decimal32)) - sum(decimal32) +FROM test_table +SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table; +55 +SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table; +-5 +SELECT sum(uint64 - 2) + sum(uint64 - 3) From test_table; +5 +SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table; +5 +SELECT sum(2 - uint64) - sum(3 - uint64) From test_table; +-5 +SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table; +55 +SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table; +-5 +SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table; +5 +SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table; +5 +SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table; +-5 +EXPLAIN SYNTAX (SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table); +SELECT (sum(uint64) + (2 * count(uint64))) + (sum(uint64) + (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table); +SELECT (sum(uint64) + (2 * count(uint64))) - (sum(uint64) + (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64 - 2) + sum(uint64 - 3) From test_table); +SELECT (sum(uint64) - (2 * count(uint64))) + (sum(uint64) - (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table); +SELECT (sum(uint64) - (2 * count(uint64))) - (sum(uint64) - (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 - uint64) - sum(3 - uint64) From test_table); +SELECT ((2 * count(uint64)) - sum(uint64)) - ((3 * count(uint64)) - sum(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table); +SELECT (sum(uint64) + (2 * count(uint64))) + (sum(uint64) + (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table); +SELECT (sum(uint64) + (2 * count(uint64))) - (sum(uint64) + (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table); +SELECT (sum(uint64) - (2 * count(uint64))) + (sum(uint64) - (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table); +SELECT (sum(uint64) - (2 * count(uint64))) - (sum(uint64) - (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table); +SELECT ((2 * count(uint64)) - sum(uint64)) + ((3 * count(uint64)) - sum(uint64)) +FROM test_table +SELECT sum(float64 + 2) + sum(float64 + 3) From test_table; +58 +SELECT sum(float64 + 2) - sum(float64 + 3) From test_table; +-5 +SELECT sum(float64 - 2) + sum(float64 - 3) From test_table; +8 +SELECT sum(float64 - 2) - sum(float64 - 3) From test_table; +5 +SELECT sum(2 - float64) - sum(3 - float64) From test_table; +-5 +SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table; +58 +SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table; +-5 +SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table; +8 +SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table; +5 +SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table; +-8 +EXPLAIN SYNTAX (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table); +SELECT (sum(float64) + (2 * count(float64))) + (sum(float64) + (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(float64 + 2) - sum(float64 + 3) From test_table); +SELECT (sum(float64) + (2 * count(float64))) - (sum(float64) + (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(float64 - 2) + sum(float64 - 3) From test_table); +SELECT (sum(float64) - (2 * count(float64))) + (sum(float64) - (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(float64 - 2) - sum(float64 - 3) From test_table); +SELECT (sum(float64) - (2 * count(float64))) - (sum(float64) - (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 - float64) - sum(3 - float64) From test_table); +SELECT ((2 * count(float64)) - sum(float64)) - ((3 * count(float64)) - sum(float64)) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table); +SELECT (sum(float64) + (2 * count(float64))) + (sum(float64) + (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table); +SELECT (sum(float64) + (2 * count(float64))) - (sum(float64) + (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table); +SELECT (sum(float64) - (2 * count(float64))) + (sum(float64) - (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table); +SELECT (sum(float64) - (2 * count(float64))) - (sum(float64) - (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table); +SELECT ((2 * count(float64)) - sum(float64)) + ((3 * count(float64)) - sum(float64)) +FROM test_table +SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table; +58.3 +SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table; +-5 +SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table; +8.3 +SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table; +5 +SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table; +-5 +SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table; +58.3 +SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table; +-5 +SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table; +8.3 +SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table; +5 +SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table; +-8.3 +EXPLAIN SYNTAX (SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table); +SELECT (sum(decimal32) + (2 * count(decimal32))) + (sum(decimal32) + (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table); +SELECT (sum(decimal32) + (2 * count(decimal32))) - (sum(decimal32) + (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table); +SELECT (sum(decimal32) - (2 * count(decimal32))) + (sum(decimal32) - (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table); +SELECT (sum(decimal32) - (2 * count(decimal32))) - (sum(decimal32) - (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table); +SELECT ((2 * count(decimal32)) - sum(decimal32)) - ((3 * count(decimal32)) - sum(decimal32)) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table); +SELECT (sum(decimal32) + (2 * count(decimal32))) + (sum(decimal32) + (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table); +SELECT (sum(decimal32) + (2 * count(decimal32))) - (sum(decimal32) + (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table); +SELECT (sum(decimal32) - (2 * count(decimal32))) + (sum(decimal32) - (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table); +SELECT (sum(decimal32) - (2 * count(decimal32))) - (sum(decimal32) - (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table); +SELECT ((2 * count(decimal32)) - sum(decimal32)) + ((3 * count(decimal32)) - sum(decimal32)) +FROM test_table diff --git a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql new file mode 100644 index 00000000000..b29407d7208 --- /dev/null +++ b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql @@ -0,0 +1,209 @@ +-- { echoOn } +Select sum(number + 1) from numbers(10); +Select sum(1 + number) from numbers(10); +Select sum(number - 1) from numbers(10); +Select sum(1 - number) from numbers(10); +EXPLAIN SYNTAX (Select sum(number + 1) from numbers(10)); +EXPLAIN SYNTAX (Select sum(1 + number) from numbers(10)); +EXPLAIN SYNTAX (Select sum(number - 1) from numbers(10)); +EXPLAIN SYNTAX (Select sum(1 - number) from numbers(10)); + +WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0); +WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0); +EXPLAIN SYNTAX (WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0)); +EXPLAIN SYNTAX (WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0)); +-- { echoOff } + +DROP TABLE IF EXISTS test_table; + +CREATE TABLE test_table +( + uint64 UInt64, + float64 Float64, + decimal32 Decimal32(5), +) ENGINE=MergeTree ORDER BY uint64; + +INSERT INTO test_table VALUES (1, 1.1, 1.11); +INSERT INTO test_table VALUES (2, 2.2, 2.22); +INSERT INTO test_table VALUES (3, 3.3, 3.33); +INSERT INTO test_table VALUES (4, 4.4, 4.44); +INSERT INTO test_table VALUES (5, 5.5, 5.55); + +-- { echoOn } +SELECT sum(uint64 + 1 AS i) from test_table where i > 0; +SELECT sum(uint64 + 1) AS j from test_table having j > 0; +SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0; +SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +EXPLAIN SYNTAX (SELECT sum(uint64 + 1 AS i) from test_table where i > 0); +EXPLAIN SYNTAX (SELECT sum(uint64 + 1) AS j from test_table having j > 0); +EXPLAIN SYNTAX (SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0); +EXPLAIN SYNTAX (SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +EXPLAIN SYNTAX (SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); + +SELECT sum(1 + uint64 AS i) from test_table where i > 0; +SELECT sum(1 + uint64) AS j from test_table having j > 0; +SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0; +SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +EXPLAIN SYNTAX (SELECT sum(1 + uint64 AS i) from test_table where i > 0); +EXPLAIN SYNTAX (SELECT sum(1 + uint64) AS j from test_table having j > 0); +EXPLAIN SYNTAX (SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0); +EXPLAIN SYNTAX (SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +EXPLAIN SYNTAX (SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); + +SELECT sum(uint64 - 1 AS i) from test_table where i > 0; +SELECT sum(uint64 - 1) AS j from test_table having j > 0; +SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0; +SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +EXPLAIN SYNTAX (SELECT sum(uint64 - 1 AS i) from test_table where i > 0); +EXPLAIN SYNTAX (SELECT sum(uint64 - 1) AS j from test_table having j > 0); +EXPLAIN SYNTAX (SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0); +EXPLAIN SYNTAX (SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +EXPLAIN SYNTAX (SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); + +SELECT sum(1 - uint64 AS i) from test_table; +SELECT sum(1 - uint64) AS j from test_table; +SELECT sum(1 - uint64 AS i) j from test_table; +SELECT sum((1 AS m) - (uint64 AS n)) j from test_table; +SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table; +EXPLAIN SYNTAX (SELECT sum(1 - uint64 AS i) from test_table where i > 0); +EXPLAIN SYNTAX (SELECT sum(1 - uint64) AS j from test_table having j < 0); +EXPLAIN SYNTAX (SELECT sum(1 - uint64 AS i) j from test_table where i > 0 having j < 0); +EXPLAIN SYNTAX (SELECT sum((1 AS m) - (uint64 AS n)) j from test_table where m > 0 and n > 0 having j < 0); +EXPLAIN SYNTAX (SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i < 0 having j < 0); + +SELECT sum(uint64 + 2.11) From test_table; +SELECT sum(2.11 + uint64) From test_table; +SELECT sum(uint64 - 2.11) From test_table; +SELECT sum(2.11 - uint64) From test_table; +SELECT sum(uint64) + 2.11 * count(uint64) From test_table; +SELECT 2.11 * count(uint64) + sum(uint64) From test_table; +SELECT sum(uint64) - 2.11 * count(uint64) From test_table; +SELECT 2.11 * count(uint64) - sum(uint64) From test_table; +EXPLAIN SYNTAX (SELECT sum(uint64 + 2.11) From test_table); +EXPLAIN SYNTAX (SELECT sum(2.11 + uint64) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64 - 2.11) From test_table); +EXPLAIN SYNTAX (SELECT sum(2.11 - uint64) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64) + 2.11 * count(uint64) From test_table); +EXPLAIN SYNTAX (SELECT 2.11 * count(uint64) + sum(uint64) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64) - 2.11 * count(uint64) From test_table); +EXPLAIN SYNTAX (SELECT 2.11 * count(uint64) - sum(uint64) From test_table); + +SELECT sum(uint64 + 2) From test_table; +SELECT sum(2 + uint64) From test_table; +SELECT sum(uint64 - 2) From test_table; +SELECT sum(2 - uint64) From test_table; +SELECT sum(uint64) + 2 * count(uint64) From test_table; +SELECT 2 * count(uint64) + sum(uint64) From test_table; +SELECT sum(uint64) - 2 * count(uint64) From test_table; +SELECT 2 * count(uint64) - sum(uint64) From test_table; +EXPLAIN SYNTAX (SELECT sum(uint64 + 2) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 + uint64) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64 - 2) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 - uint64) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64) + 2 * count(uint64) From test_table); +EXPLAIN SYNTAX (SELECT 2 * count(uint64) + sum(uint64) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64) - 2 * count(uint64) From test_table); +EXPLAIN SYNTAX (SELECT 2 * count(uint64) - sum(uint64) From test_table); + +SELECT sum(float64 + 2) From test_table; +SELECT sum(2 + float64) From test_table; +SELECT sum(float64 - 2) From test_table; +SELECT sum(2 - float64) From test_table; +SELECT sum(float64) + 2 * count(float64) From test_table; +SELECT 2 * count(float64) + sum(float64) From test_table; +SELECT sum(float64) - 2 * count(float64) From test_table; +SELECT 2 * count(float64) - sum(float64) From test_table; +EXPLAIN SYNTAX (SELECT sum(float64 + 2) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 + float64) From test_table); +EXPLAIN SYNTAX (SELECT sum(float64 - 2) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 - float64) From test_table); +EXPLAIN SYNTAX (SELECT sum(float64) + 2 * count(float64) From test_table); +EXPLAIN SYNTAX (SELECT 2 * count(float64) + sum(float64) From test_table); +EXPLAIN SYNTAX (SELECT sum(float64) - 2 * count(float64) From test_table); +EXPLAIN SYNTAX (SELECT 2 * count(float64) - sum(float64) From test_table); + +SELECT sum(decimal32 + 2) From test_table; +SELECT sum(2 + decimal32) From test_table; +SELECT sum(decimal32 - 2) From test_table; +SELECT sum(2 - decimal32) From test_table; +SELECT sum(decimal32) + 2 * count(decimal32) From test_table; +SELECT 2 * count(decimal32) + sum(decimal32) From test_table; +SELECT sum(decimal32) - 2 * count(decimal32) From test_table; +SELECT 2 * count(decimal32) - sum(decimal32) From test_table; +EXPLAIN SYNTAX (SELECT sum(decimal32 + 2) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 + decimal32) From test_table); +EXPLAIN SYNTAX (SELECT sum(decimal32 - 2) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 - decimal32) From test_table); +EXPLAIN SYNTAX (SELECT sum(decimal32) + 2 * count(decimal32) From test_table); +EXPLAIN SYNTAX (SELECT 2 * count(decimal32) + sum(decimal32) From test_table); +EXPLAIN SYNTAX (SELECT sum(decimal32) - 2 * count(decimal32) From test_table); +EXPLAIN SYNTAX (SELECT 2 * count(decimal32) - sum(decimal32) From test_table); + +SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table; +SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table; +SELECT sum(uint64 - 2) + sum(uint64 - 3) From test_table; +SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table; +SELECT sum(2 - uint64) - sum(3 - uint64) From test_table; +SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table; +SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table; +SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table; +SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table; +SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table; +EXPLAIN SYNTAX (SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64 - 2) + sum(uint64 - 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 - uint64) - sum(3 - uint64) From test_table); +EXPLAIN SYNTAX (SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table); +EXPLAIN SYNTAX (SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table); + +SELECT sum(float64 + 2) + sum(float64 + 3) From test_table; +SELECT sum(float64 + 2) - sum(float64 + 3) From test_table; +SELECT sum(float64 - 2) + sum(float64 - 3) From test_table; +SELECT sum(float64 - 2) - sum(float64 - 3) From test_table; +SELECT sum(2 - float64) - sum(3 - float64) From test_table; +SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table; +SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table; +SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table; +SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table; +SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table; +EXPLAIN SYNTAX (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(float64 + 2) - sum(float64 + 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(float64 - 2) + sum(float64 - 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(float64 - 2) - sum(float64 - 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 - float64) - sum(3 - float64) From test_table); +EXPLAIN SYNTAX (SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table); +EXPLAIN SYNTAX (SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table); + +SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table; +SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table; +SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table; +SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table; +SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table; +SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table; +SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table; +SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table; +SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table; +SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table; +EXPLAIN SYNTAX (SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table); +EXPLAIN SYNTAX (SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table); +EXPLAIN SYNTAX (SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table); +-- { echoOff } + +DROP TABLE IF EXISTS test_table; diff --git a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference new file mode 100644 index 00000000000..802d920aaef --- /dev/null +++ b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference @@ -0,0 +1,3324 @@ +-- { echoOn } +Select sum(number + 1) from numbers(10); +55 +Select sum(1 + number) from numbers(10); +55 +Select sum(number - 1) from numbers(10); +35 +Select sum(1 - number) from numbers(10); +-35 +EXPLAIN QUERY TREE (Select sum(number + 1) from numbers(10)); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(number, 1)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_10, constant_value_type: UInt8 +EXPLAIN QUERY TREE (Select sum(1 + number) from numbers(10)); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(1, number)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: number, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: number, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE_FUNCTION id: 10, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_10, constant_value_type: UInt8 +EXPLAIN QUERY TREE (Select sum(number - 1) from numbers(10)); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(number, 1)) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_10, constant_value_type: UInt8 +EXPLAIN QUERY TREE (Select sum(1 - number) from numbers(10)); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(1, number)) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: number, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: number, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE_FUNCTION id: 10, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_10, constant_value_type: UInt8 +WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0); +\N +WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0); +0 +EXPLAIN QUERY TREE (WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0)); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(number, my_literal)) Nullable(UInt64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: sum, function_type: aggregate, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_1, constant_value_type: Nullable(UInt64) + EXPRESSION + FUNCTION id: 9, alias: my_literal, function_name: CAST, function_type: ordinary, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 10, nodes: 2 + CONSTANT id: 11, constant_value: \'1\', constant_value_type: String + CONSTANT id: 12, constant_value: \'Nullable(UInt64)\', constant_value_type: String + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0)); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(number), multiply(my_literal, count())) Nullable(UInt64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: Nullable(UInt64) + EXPRESSION + FUNCTION id: 11, alias: my_literal, function_name: CAST, function_type: ordinary, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: \'1\', constant_value_type: String + CONSTANT id: 14, constant_value: \'Nullable(UInt64)\', constant_value_type: String + FUNCTION id: 15, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 16, nodes: 1 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 +-- { echoOn } +SELECT sum(uint64 + 1 AS i) from test_table where i > 0; +20 +SELECT sum(uint64 + 1) AS j from test_table having j > 0; +20 +SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0; +20 +SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +20 +SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +20 +EXPLAIN QUERY TREE (SELECT sum(uint64 + 1 AS i) from test_table where i > 0); +QUERY id: 0 + PROJECTION COLUMNS + sum(i) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(uint64 + 1) AS j from test_table having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + HAVING + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 22, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 23, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 18, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + FUNCTION id: 20, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 2 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 24, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 26, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 28, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 22, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 2 + FUNCTION id: 26, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 28, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 29, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 30, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 31, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 32, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 3 + FUNCTION id: 15, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 22, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 26, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 27, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 28, nodes: 2 + FUNCTION id: 29, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 30, nodes: 2 + FUNCTION id: 31, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 32, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 33, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 34, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 35, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 36, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 37, constant_value: UInt64_0, constant_value_type: UInt8 +SELECT sum(1 + uint64 AS i) from test_table where i > 0; +20 +SELECT sum(1 + uint64) AS j from test_table having j > 0; +20 +SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0; +20 +SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +20 +SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +20 +EXPLAIN QUERY TREE (SELECT sum(1 + uint64 AS i) from test_table where i > 0); +QUERY id: 0 + PROJECTION COLUMNS + sum(i) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(1 + uint64) AS j from test_table having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + HAVING + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 19, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 21, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 22, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 23, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 18, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + FUNCTION id: 20, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 2 + FUNCTION id: 22, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 24, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 26, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 28, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 16, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 22, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 2 + FUNCTION id: 26, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 28, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 29, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 30, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 31, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 32, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 16, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 22, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 26, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 27, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 28, nodes: 2 + FUNCTION id: 29, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 30, nodes: 2 + FUNCTION id: 31, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 32, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 33, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 34, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 35, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 36, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 37, constant_value: UInt64_0, constant_value_type: UInt8 +SELECT sum(uint64 - 1 AS i) from test_table where i > 0; +10 +SELECT sum(uint64 - 1) AS j from test_table having j > 0; +10 +SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0; +10 +SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +10 +SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +10 +EXPLAIN QUERY TREE (SELECT sum(uint64 - 1 AS i) from test_table where i > 0); +QUERY id: 0 + PROJECTION COLUMNS + sum(i) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(uint64 - 1) AS j from test_table having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + HAVING + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 22, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 23, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 18, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + FUNCTION id: 20, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 21, nodes: 2 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 24, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 26, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 28, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 22, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 25, nodes: 2 + FUNCTION id: 26, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 28, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 29, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 30, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 31, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 32, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 3 + FUNCTION id: 15, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 22, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 25, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 26, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 27, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 28, nodes: 2 + FUNCTION id: 29, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 30, nodes: 2 + FUNCTION id: 31, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 32, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 33, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 34, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 35, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 36, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 37, constant_value: UInt64_0, constant_value_type: UInt8 +SELECT sum(1 - uint64 AS i) from test_table; +-10 +SELECT sum(1 - uint64) AS j from test_table; +-10 +SELECT sum(1 - uint64 AS i) j from test_table; +-10 +SELECT sum((1 AS m) - (uint64 AS n)) j from test_table; +-10 +SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table; +-10 +EXPLAIN QUERY TREE (SELECT sum(1 - uint64 AS i) from test_table where i > 0); +QUERY id: 0 + PROJECTION COLUMNS + sum(i) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(1 - uint64) AS j from test_table having j < 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + HAVING + FUNCTION id: 13, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 19, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 21, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 22, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 23, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(1 - uint64 AS i) j from test_table where i > 0 having j < 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 18, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + FUNCTION id: 20, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 21, nodes: 2 + FUNCTION id: 22, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 24, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 26, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 28, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum((1 AS m) - (uint64 AS n)) j from test_table where m > 0 and n > 0 having j < 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 16, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 22, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 25, nodes: 2 + FUNCTION id: 26, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 28, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 29, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 30, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 31, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 32, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i < 0 having j < 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 16, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 22, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 25, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 26, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 27, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 28, nodes: 2 + FUNCTION id: 29, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 30, nodes: 2 + FUNCTION id: 31, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 32, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 33, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 34, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 35, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 36, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 37, constant_value: UInt64_0, constant_value_type: UInt8 +SELECT sum(uint64 + 2.11) From test_table; +25.549999999999997 +SELECT sum(2.11 + uint64) From test_table; +25.549999999999997 +SELECT sum(uint64 - 2.11) From test_table; +4.450000000000001 +SELECT sum(2.11 - uint64) From test_table; +-4.450000000000001 +SELECT sum(uint64) + 2.11 * count(uint64) From test_table; +25.549999999999997 +SELECT 2.11 * count(uint64) + sum(uint64) From test_table; +25.549999999999997 +SELECT sum(uint64) - 2.11 * count(uint64) From test_table; +4.450000000000001 +SELECT 2.11 * count(uint64) - sum(uint64) From test_table; +-4.450000000000001 +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2.11) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(uint64, 2.11)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2.11 + uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(2.11, uint64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2.11) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(uint64, 2.11)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2.11 - uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(2.11, uint64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64) + 2.11 * count(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(uint64), multiply(2.11, count(uint64))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2.11 * count(uint64) + sum(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(multiply(2.11, count(uint64)), sum(uint64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64) - 2.11 * count(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(uint64), multiply(2.11, count(uint64))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2.11 * count(uint64) - sum(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(multiply(2.11, count(uint64)), sum(uint64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +SELECT sum(uint64 + 2) From test_table; +25 +SELECT sum(2 + uint64) From test_table; +25 +SELECT sum(uint64 - 2) From test_table; +5 +SELECT sum(2 - uint64) From test_table; +-5 +SELECT sum(uint64) + 2 * count(uint64) From test_table; +25 +SELECT 2 * count(uint64) + sum(uint64) From test_table; +25 +SELECT sum(uint64) - 2 * count(uint64) From test_table; +5 +SELECT 2 * count(uint64) - sum(uint64) From test_table; +-5 +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(uint64, 2)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 + uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(2, uint64)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(uint64, 2)) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 - uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(2, uint64)) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64) + 2 * count(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(uint64), multiply(2, count(uint64))) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2 * count(uint64) + sum(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(multiply(2, count(uint64)), sum(uint64)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64) - 2 * count(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(uint64), multiply(2, count(uint64))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2 * count(uint64) - sum(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(multiply(2, count(uint64)), sum(uint64)) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +SELECT sum(float64 + 2) From test_table; +26.5 +SELECT sum(2 + float64) From test_table; +26.5 +SELECT sum(float64 - 2) From test_table; +6.5 +SELECT sum(2 - float64) From test_table; +-6.5 +SELECT sum(float64) + 2 * count(float64) From test_table; +26.5 +SELECT 2 * count(float64) + sum(float64) From test_table; +26.5 +SELECT sum(float64) - 2 * count(float64) From test_table; +6.5 +SELECT 2 * count(float64) - sum(float64) From test_table; +-6.5 +EXPLAIN QUERY TREE (SELECT sum(float64 + 2) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(float64, 2)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 + float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(2, float64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(float64 - 2) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(float64, 2)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 - float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(2, float64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(float64) + 2 * count(float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(float64), multiply(2, count(float64))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2 * count(float64) + sum(float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(multiply(2, count(float64)), sum(float64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(float64) - 2 * count(float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(float64), multiply(2, count(float64))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2 * count(float64) - sum(float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(multiply(2, count(float64)), sum(float64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +SELECT sum(decimal32 + 2) From test_table; +26.65 +SELECT sum(2 + decimal32) From test_table; +26.65 +SELECT sum(decimal32 - 2) From test_table; +6.65 +SELECT sum(2 - decimal32) From test_table; +-6.65 +SELECT sum(decimal32) + 2 * count(decimal32) From test_table; +26.65 +SELECT 2 * count(decimal32) + sum(decimal32) From test_table; +26.65 +SELECT sum(decimal32) - 2 * count(decimal32) From test_table; +6.65 +SELECT 2 * count(decimal32) - sum(decimal32) From test_table; +-6.65 +EXPLAIN QUERY TREE (SELECT sum(decimal32 + 2) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(decimal32, 2)) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 + decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(2, decimal32)) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(decimal32 - 2) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(decimal32, 2)) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 - decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(2, decimal32)) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(decimal32) + 2 * count(decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(decimal32), multiply(2, count(decimal32))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2 * count(decimal32) + sum(decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(multiply(2, count(decimal32)), sum(decimal32)) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(decimal32) - 2 * count(decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(decimal32), multiply(2, count(decimal32))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2 * count(decimal32) - sum(decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(multiply(2, count(decimal32)), sum(decimal32)) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table; +55 +SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table; +-5 +SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table; +5 +SELECT sum(2 - uint64) - sum(3 - uint64) From test_table; +-5 +SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table; +55 +SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table; +-5 +SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table; +5 +SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table; +5 +SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table; +-5 +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(plus(uint64, 2)), sum(plus(uint64, 3))) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(plus(uint64, 2)), sum(plus(uint64, 3))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2) + sum(uint64 - 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(minus(uint64, 2)), sum(minus(uint64, 3))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(minus(uint64, 2)), sum(minus(uint64, 3))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 - uint64) - sum(3 - uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(minus(2, uint64)), sum(minus(3, uint64))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 10, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + FUNCTION id: 13, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 20, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + JOIN TREE + TABLE id: 12, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(plus(sum(uint64), multiply(2, count(uint64))), plus(sum(uint64), multiply(3, count(uint64)))) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(plus(sum(uint64), multiply(2, count(uint64))), plus(sum(uint64), multiply(3, count(uint64)))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(minus(sum(uint64), multiply(2, count(uint64))), minus(sum(uint64), multiply(3, count(uint64)))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(minus(sum(uint64), multiply(2, count(uint64))), minus(sum(uint64), multiply(3, count(uint64)))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(minus(multiply(2, count(uint64)), sum(uint64)), minus(multiply(3, count(uint64)), sum(uint64))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 10, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + FUNCTION id: 13, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 20, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + JOIN TREE + TABLE id: 12, alias: __table1, table_name: default.test_table +SELECT sum(float64 + 2) + sum(float64 + 3) From test_table; +58 +SELECT sum(float64 + 2) - sum(float64 + 3) From test_table; +-5 +SELECT sum(float64 - 2) + sum(float64 - 3) From test_table; +8 +SELECT sum(float64 - 2) - sum(float64 - 3) From test_table; +5 +SELECT sum(2 - float64) - sum(3 - float64) From test_table; +-5 +SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table; +58 +SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table; +-5 +SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table; +8 +SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table; +5 +SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table; +-8 +EXPLAIN QUERY TREE (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(plus(float64, 2)), sum(plus(float64, 3))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(float64 + 2) - sum(float64 + 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(plus(float64, 2)), sum(plus(float64, 3))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(float64 - 2) + sum(float64 - 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(minus(float64, 2)), sum(minus(float64, 3))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(float64 - 2) - sum(float64 - 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(minus(float64, 2)), sum(minus(float64, 3))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 - float64) - sum(3 - float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(minus(2, float64)), sum(minus(3, float64))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 10, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + FUNCTION id: 13, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 20, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + JOIN TREE + TABLE id: 12, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(plus(sum(float64), multiply(2, count(float64))), plus(sum(float64), multiply(3, count(float64)))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(plus(sum(float64), multiply(2, count(float64))), plus(sum(float64), multiply(3, count(float64)))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(minus(sum(float64), multiply(2, count(float64))), minus(sum(float64), multiply(3, count(float64)))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(minus(sum(float64), multiply(2, count(float64))), minus(sum(float64), multiply(3, count(float64)))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(minus(multiply(2, count(float64)), sum(float64)), minus(multiply(3, count(float64)), sum(float64))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 10, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + FUNCTION id: 13, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 20, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + JOIN TREE + TABLE id: 12, alias: __table1, table_name: default.test_table +SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table; +58.3 +SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table; +-5 +SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table; +8.3 +SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table; +5 +SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table; +-5 +SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table; +58.3 +SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table; +-5 +SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table; +8.3 +SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table; +5 +SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table; +-8.3 +EXPLAIN QUERY TREE (SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(plus(decimal32, 2)), sum(plus(decimal32, 3))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(plus(decimal32, 2)), sum(plus(decimal32, 3))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(minus(decimal32, 2)), sum(minus(decimal32, 3))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(minus(decimal32, 2)), sum(minus(decimal32, 3))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(minus(2, decimal32)), sum(minus(3, decimal32))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 10, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + FUNCTION id: 13, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 20, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + JOIN TREE + TABLE id: 12, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(plus(sum(decimal32), multiply(2, count(decimal32))), plus(sum(decimal32), multiply(3, count(decimal32)))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(plus(sum(decimal32), multiply(2, count(decimal32))), plus(sum(decimal32), multiply(3, count(decimal32)))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(minus(sum(decimal32), multiply(2, count(decimal32))), minus(sum(decimal32), multiply(3, count(decimal32)))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(minus(sum(decimal32), multiply(2, count(decimal32))), minus(sum(decimal32), multiply(3, count(decimal32)))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(minus(multiply(2, count(decimal32)), sum(decimal32)), minus(multiply(3, count(decimal32)), sum(decimal32))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 10, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + FUNCTION id: 13, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 20, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + JOIN TREE + TABLE id: 12, alias: __table1, table_name: default.test_table diff --git a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql new file mode 100644 index 00000000000..43dad8eb8e0 --- /dev/null +++ b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql @@ -0,0 +1,210 @@ +SET allow_experimental_analyzer=1; + +-- { echoOn } +Select sum(number + 1) from numbers(10); +Select sum(1 + number) from numbers(10); +Select sum(number - 1) from numbers(10); +Select sum(1 - number) from numbers(10); +EXPLAIN QUERY TREE (Select sum(number + 1) from numbers(10)); +EXPLAIN QUERY TREE (Select sum(1 + number) from numbers(10)); +EXPLAIN QUERY TREE (Select sum(number - 1) from numbers(10)); +EXPLAIN QUERY TREE (Select sum(1 - number) from numbers(10)); + +WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0); +WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0); +EXPLAIN QUERY TREE (WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0)); +EXPLAIN QUERY TREE (WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0)); +-- { echoOff } + +DROP TABLE IF EXISTS test_table; + +CREATE TABLE test_table +( + uint64 UInt64, + float64 Float64, + decimal32 Decimal32(5), +) ENGINE=MergeTree ORDER BY uint64; + +INSERT INTO test_table VALUES (1, 1.1, 1.11); +INSERT INTO test_table VALUES (2, 2.2, 2.22); +INSERT INTO test_table VALUES (3, 3.3, 3.33); +INSERT INTO test_table VALUES (4, 4.4, 4.44); +INSERT INTO test_table VALUES (5, 5.5, 5.55); + +-- { echoOn } +SELECT sum(uint64 + 1 AS i) from test_table where i > 0; +SELECT sum(uint64 + 1) AS j from test_table having j > 0; +SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0; +SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +EXPLAIN QUERY TREE (SELECT sum(uint64 + 1 AS i) from test_table where i > 0); +EXPLAIN QUERY TREE (SELECT sum(uint64 + 1) AS j from test_table having j > 0); +EXPLAIN QUERY TREE (SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0); +EXPLAIN QUERY TREE (SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +EXPLAIN QUERY TREE (SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); + +SELECT sum(1 + uint64 AS i) from test_table where i > 0; +SELECT sum(1 + uint64) AS j from test_table having j > 0; +SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0; +SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +EXPLAIN QUERY TREE (SELECT sum(1 + uint64 AS i) from test_table where i > 0); +EXPLAIN QUERY TREE (SELECT sum(1 + uint64) AS j from test_table having j > 0); +EXPLAIN QUERY TREE (SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0); +EXPLAIN QUERY TREE (SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +EXPLAIN QUERY TREE (SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); + +SELECT sum(uint64 - 1 AS i) from test_table where i > 0; +SELECT sum(uint64 - 1) AS j from test_table having j > 0; +SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0; +SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +EXPLAIN QUERY TREE (SELECT sum(uint64 - 1 AS i) from test_table where i > 0); +EXPLAIN QUERY TREE (SELECT sum(uint64 - 1) AS j from test_table having j > 0); +EXPLAIN QUERY TREE (SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0); +EXPLAIN QUERY TREE (SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +EXPLAIN QUERY TREE (SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); + +SELECT sum(1 - uint64 AS i) from test_table; +SELECT sum(1 - uint64) AS j from test_table; +SELECT sum(1 - uint64 AS i) j from test_table; +SELECT sum((1 AS m) - (uint64 AS n)) j from test_table; +SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table; +EXPLAIN QUERY TREE (SELECT sum(1 - uint64 AS i) from test_table where i > 0); +EXPLAIN QUERY TREE (SELECT sum(1 - uint64) AS j from test_table having j < 0); +EXPLAIN QUERY TREE (SELECT sum(1 - uint64 AS i) j from test_table where i > 0 having j < 0); +EXPLAIN QUERY TREE (SELECT sum((1 AS m) - (uint64 AS n)) j from test_table where m > 0 and n > 0 having j < 0); +EXPLAIN QUERY TREE (SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i < 0 having j < 0); + +SELECT sum(uint64 + 2.11) From test_table; +SELECT sum(2.11 + uint64) From test_table; +SELECT sum(uint64 - 2.11) From test_table; +SELECT sum(2.11 - uint64) From test_table; +SELECT sum(uint64) + 2.11 * count(uint64) From test_table; +SELECT 2.11 * count(uint64) + sum(uint64) From test_table; +SELECT sum(uint64) - 2.11 * count(uint64) From test_table; +SELECT 2.11 * count(uint64) - sum(uint64) From test_table; +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2.11) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2.11 + uint64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2.11) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2.11 - uint64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64) + 2.11 * count(uint64) From test_table); +EXPLAIN QUERY TREE (SELECT 2.11 * count(uint64) + sum(uint64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64) - 2.11 * count(uint64) From test_table); +EXPLAIN QUERY TREE (SELECT 2.11 * count(uint64) - sum(uint64) From test_table); + +SELECT sum(uint64 + 2) From test_table; +SELECT sum(2 + uint64) From test_table; +SELECT sum(uint64 - 2) From test_table; +SELECT sum(2 - uint64) From test_table; +SELECT sum(uint64) + 2 * count(uint64) From test_table; +SELECT 2 * count(uint64) + sum(uint64) From test_table; +SELECT sum(uint64) - 2 * count(uint64) From test_table; +SELECT 2 * count(uint64) - sum(uint64) From test_table; +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 + uint64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 - uint64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64) + 2 * count(uint64) From test_table); +EXPLAIN QUERY TREE (SELECT 2 * count(uint64) + sum(uint64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64) - 2 * count(uint64) From test_table); +EXPLAIN QUERY TREE (SELECT 2 * count(uint64) - sum(uint64) From test_table); + +SELECT sum(float64 + 2) From test_table; +SELECT sum(2 + float64) From test_table; +SELECT sum(float64 - 2) From test_table; +SELECT sum(2 - float64) From test_table; +SELECT sum(float64) + 2 * count(float64) From test_table; +SELECT 2 * count(float64) + sum(float64) From test_table; +SELECT sum(float64) - 2 * count(float64) From test_table; +SELECT 2 * count(float64) - sum(float64) From test_table; +EXPLAIN QUERY TREE (SELECT sum(float64 + 2) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 + float64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(float64 - 2) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 - float64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(float64) + 2 * count(float64) From test_table); +EXPLAIN QUERY TREE (SELECT 2 * count(float64) + sum(float64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(float64) - 2 * count(float64) From test_table); +EXPLAIN QUERY TREE (SELECT 2 * count(float64) - sum(float64) From test_table); + +SELECT sum(decimal32 + 2) From test_table; +SELECT sum(2 + decimal32) From test_table; +SELECT sum(decimal32 - 2) From test_table; +SELECT sum(2 - decimal32) From test_table; +SELECT sum(decimal32) + 2 * count(decimal32) From test_table; +SELECT 2 * count(decimal32) + sum(decimal32) From test_table; +SELECT sum(decimal32) - 2 * count(decimal32) From test_table; +SELECT 2 * count(decimal32) - sum(decimal32) From test_table; +EXPLAIN QUERY TREE (SELECT sum(decimal32 + 2) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 + decimal32) From test_table); +EXPLAIN QUERY TREE (SELECT sum(decimal32 - 2) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 - decimal32) From test_table); +EXPLAIN QUERY TREE (SELECT sum(decimal32) + 2 * count(decimal32) From test_table); +EXPLAIN QUERY TREE (SELECT 2 * count(decimal32) + sum(decimal32) From test_table); +EXPLAIN QUERY TREE (SELECT sum(decimal32) - 2 * count(decimal32) From test_table); +EXPLAIN QUERY TREE (SELECT 2 * count(decimal32) - sum(decimal32) From test_table); + +SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table; +SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table; +SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table; +SELECT sum(2 - uint64) - sum(3 - uint64) From test_table; +SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table; +SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table; +SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table; +SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table; +SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table; +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2) + sum(uint64 - 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 - uint64) - sum(3 - uint64) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table); +EXPLAIN QUERY TREE (SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table); + +SELECT sum(float64 + 2) + sum(float64 + 3) From test_table; +SELECT sum(float64 + 2) - sum(float64 + 3) From test_table; +SELECT sum(float64 - 2) + sum(float64 - 3) From test_table; +SELECT sum(float64 - 2) - sum(float64 - 3) From test_table; +SELECT sum(2 - float64) - sum(3 - float64) From test_table; +SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table; +SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table; +SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table; +SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table; +SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table; +EXPLAIN QUERY TREE (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(float64 + 2) - sum(float64 + 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(float64 - 2) + sum(float64 - 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(float64 - 2) - sum(float64 - 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 - float64) - sum(3 - float64) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table); +EXPLAIN QUERY TREE (SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table); + +SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table; +SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table; +SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table; +SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table; +SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table; +SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table; +SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table; +SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table; +SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table; +SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table; +EXPLAIN QUERY TREE (SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table); +EXPLAIN QUERY TREE (SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table); +-- { echoOff } + +DROP TABLE IF EXISTS test_table; diff --git a/tests/queries/0_stateless/02933_paste_join.reference b/tests/queries/0_stateless/02933_paste_join.reference index 5ff13917957..81a8ac22da4 100644 --- a/tests/queries/0_stateless/02933_paste_join.reference +++ b/tests/queries/0_stateless/02933_paste_join.reference @@ -82,3 +82,26 @@ UInt64 7 2 8 1 9 0 +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +1 2 3 +0 0 +1 1 +0 +1 diff --git a/tests/queries/0_stateless/02933_paste_join.sql b/tests/queries/0_stateless/02933_paste_join.sql index b103bf72160..604078d1c3a 100644 --- a/tests/queries/0_stateless/02933_paste_join.sql +++ b/tests/queries/0_stateless/02933_paste_join.sql @@ -1,6 +1,6 @@ select * from (SELECT number as a FROM numbers(10)) t1 PASTE JOIN (select number as a from numbers(10)) t2; select * from (SELECT number as a FROM numbers(10)) t1 PASTE JOIN (select number as a from numbers(10) order by a desc) t2; -create table if not exists test (num UInt64) engine=Memory; +create table if not exists test (number UInt64) engine=Memory; insert into test select number from numbers(6); insert into test select number from numbers(5); SELECT * FROM (SELECT 1) t1 PASTE JOIN (SELECT 2) SETTINGS joined_subquery_requires_alias=0; @@ -35,3 +35,21 @@ SET max_threads = 2; select * from (SELECT number as a FROM numbers_mt(10)) t1 PASTE JOIN (select number as a from numbers(10) ORDER BY a DESC) t2 SETTINGS max_block_size=10; select * from (SELECT number as a FROM numbers(10)) t1 ANY PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR } select * from (SELECT number as a FROM numbers(10)) t1 ALL PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR } + +TRUNCATE TABLE test; +INSERT INTO test SELECT number from numbers(6); +SELECT * FROM (SELECT number FROM test) PASTE JOIN (SELECT number FROM numbers(6) ORDER BY number) SETTINGS joined_subquery_requires_alias = 0; +SELECT * FROM (SELECT number FROM test PASTE JOIN (Select number FROM numbers(7))) PASTE JOIN (SELECT number FROM numbers(6) PASTE JOIN (SELECT number FROM test)) SETTINGS joined_subquery_requires_alias = 0; +SELECT * FROM (SELECT number FROM test PASTE JOIN (SELECT number FROM test PASTE JOIN (Select number FROM numbers(7)))) PASTE JOIN (SELECT number FROM numbers(6) PASTE JOIN (SELECT number FROM test)) SETTINGS joined_subquery_requires_alias = 0; +SELECT * FROM (SELECT 1 AS a) PASTE JOIN (SELECT 2 AS b) PASTE JOIN (SELECT 3 AS c) SETTINGS allow_experimental_analyzer = 1; +SELECT * FROM (SELECT 1 AS a) PASTE JOIN (SELECT 2 AS b) PASTE JOIN (SELECT 3 AS a) SETTINGS allow_experimental_analyzer = 1; -- { serverError AMBIGUOUS_COLUMN_NAME } + +SET allow_experimental_analyzer = 1; +CREATE TABLE test1 (a Int32) engine=MergeTree order by a; +INSERT INTO test1 SELECT * FROM numbers(2); +CREATE TABLE test2 (a Int32) engine=MergeTree order by a; +INSERT INTO test2 SELECT * FROM numbers(2); +SELECT * FROM test1 PASTE JOIN (SELECT * FROM test2); +SELECT a `test2.a` FROM test1 PASTE JOIN test2; +SELECT * FROM test1 `test2.a` PASTE JOIN test2 `test2.a`; -- { serverError MULTIPLE_EXPRESSIONS_FOR_ALIAS } +SELECT * FROM test1 PASTE JOIN (SELECT number AS a FROM numbers(2) ORDER BY number DESC); -- { serverError AMBIGUOUS_COLUMN_NAME } diff --git a/tests/queries/0_stateless/02968_file_log_multiple_read.reference b/tests/queries/0_stateless/02968_file_log_multiple_read.reference new file mode 100644 index 00000000000..40afb2d64f9 --- /dev/null +++ b/tests/queries/0_stateless/02968_file_log_multiple_read.reference @@ -0,0 +1,30 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 diff --git a/tests/queries/0_stateless/02968_file_log_multiple_read.sh b/tests/queries/0_stateless/02968_file_log_multiple_read.sh new file mode 100755 index 00000000000..199893a9428 --- /dev/null +++ b/tests/queries/0_stateless/02968_file_log_multiple_read.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Data preparation. +# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: +# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" +user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +logs_dir=${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME} + +rm -rf ${logs_dir} + +mkdir -p ${logs_dir}/ + +for i in {1..10} +do + echo $i >> ${logs_dir}/a.txt +done + +${CLICKHOUSE_CLIENT} -n --query=" +DROP TABLE IF EXISTS file_log; +DROP TABLE IF EXISTS table_to_store_data; +DROP TABLE IF EXISTS file_log_mv; + +CREATE TABLE file_log ( + id Int64 +) ENGINE = FileLog('${logs_dir}/', 'CSV'); + +CREATE TABLE table_to_store_data ( + id Int64 +) ENGINE = MergeTree +ORDER BY id; + +CREATE MATERIALIZED VIEW file_log_mv TO table_to_store_data AS + SELECT id + FROM file_log + WHERE id NOT IN ( + SELECT id + FROM table_to_store_data + WHERE id IN ( + SELECT id + FROM file_log + ) + ); +" + +function count() +{ + COUNT=$(${CLICKHOUSE_CLIENT} --query "select count() from table_to_store_data;") + echo $COUNT +} + +for i in {1..10} +do + [[ $(count) -gt 0 ]] && break + sleep 1 +done + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM table_to_store_data ORDER BY id;" + +for i in {1..20} +do + echo $i >> ${logs_dir}/a.txt +done + +for i in {1..10} +do + [[ $(count) -gt 10 ]] && break + sleep 1 +done + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM table_to_store_data ORDER BY id;" + +${CLICKHOUSE_CLIENT} -n --query=" +DROP TABLE file_log; +DROP TABLE table_to_store_data; +DROP TABLE file_log_mv; +" + +rm -rf ${logs_dir} diff --git a/tests/queries/0_stateless/02968_url_args.reference b/tests/queries/0_stateless/02968_url_args.reference new file mode 100644 index 00000000000..aa19e45301c --- /dev/null +++ b/tests/queries/0_stateless/02968_url_args.reference @@ -0,0 +1 @@ +CREATE TABLE default.a\n(\n `x` Int64\n)\nENGINE = URL(\'https://example.com/\', \'CSV\', headers(\'foo\' = \'bar\')) diff --git a/tests/queries/0_stateless/02968_url_args.sql b/tests/queries/0_stateless/02968_url_args.sql new file mode 100644 index 00000000000..8bee9fec0ac --- /dev/null +++ b/tests/queries/0_stateless/02968_url_args.sql @@ -0,0 +1,2 @@ +create table a (x Int64) engine URL('https://example.com/', CSV, headers('foo' = 'bar')); +show create a; diff --git a/tests/queries/0_stateless/02969_archive_seek.reference b/tests/queries/0_stateless/02969_archive_seek.reference new file mode 100644 index 00000000000..a6937569dc7 --- /dev/null +++ b/tests/queries/0_stateless/02969_archive_seek.reference @@ -0,0 +1 @@ +10551038310762432828 diff --git a/tests/queries/0_stateless/02969_archive_seek.sh b/tests/queries/0_stateless/02969_archive_seek.sh new file mode 100755 index 00000000000..65507aa854a --- /dev/null +++ b/tests/queries/0_stateless/02969_archive_seek.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select sum(cityHash64(*)) from file('$CURDIR/data_parquet/02969.zip :: u.parquet') settings max_threads=4, max_read_buffer_size=1000" \ No newline at end of file diff --git a/tests/queries/0_stateless/02969_functions_to_subcolumns_if_null.reference b/tests/queries/0_stateless/02969_functions_to_subcolumns_if_null.reference new file mode 100644 index 00000000000..433ef84b3bf --- /dev/null +++ b/tests/queries/0_stateless/02969_functions_to_subcolumns_if_null.reference @@ -0,0 +1,4 @@ +10000 +0 +0 +0 diff --git a/tests/queries/0_stateless/02969_functions_to_subcolumns_if_null.sql b/tests/queries/0_stateless/02969_functions_to_subcolumns_if_null.sql new file mode 100644 index 00000000000..361fd7c7a4e --- /dev/null +++ b/tests/queries/0_stateless/02969_functions_to_subcolumns_if_null.sql @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS t_subcolumns_if; + +CREATE TABLE t_subcolumns_if (id Nullable(Int64)) ENGINE=MergeTree ORDER BY tuple(); + +INSERT INTO t_subcolumns_if SELECT number::Nullable(Int64) as number FROM numbers(10000); + +SELECT + sum(multiIf(id IS NOT NULL, 1, 0)) +FROM t_subcolumns_if +SETTINGS allow_experimental_analyzer = 1, optimize_functions_to_subcolumns = 1; + +SELECT + sum(multiIf(id IS NULL, 1, 0)) +FROM t_subcolumns_if +SETTINGS allow_experimental_analyzer = 0, optimize_functions_to_subcolumns = 1; + +SELECT + sum(multiIf(id IS NULL, 1, 0)) +FROM t_subcolumns_if +SETTINGS allow_experimental_analyzer = 1, optimize_functions_to_subcolumns = 0; + +SELECT + sum(multiIf(id IS NULL, 1, 0)) +FROM t_subcolumns_if +SETTINGS allow_experimental_analyzer = 1, optimize_functions_to_subcolumns = 1; + +DROP TABLE IF EXISTS t_subcolumns_if; diff --git a/tests/queries/0_stateless/02970_visible_width_behavior.reference b/tests/queries/0_stateless/02970_visible_width_behavior.reference new file mode 100644 index 00000000000..006be015ed7 --- /dev/null +++ b/tests/queries/0_stateless/02970_visible_width_behavior.reference @@ -0,0 +1,5 @@ +28 +19 +28 +19 +28 diff --git a/tests/queries/0_stateless/02970_visible_width_behavior.sql b/tests/queries/0_stateless/02970_visible_width_behavior.sql new file mode 100644 index 00000000000..efaa8852c34 --- /dev/null +++ b/tests/queries/0_stateless/02970_visible_width_behavior.sql @@ -0,0 +1,6 @@ +SELECT visibleWidth('ClickHouse是一个很好的数据库'); +SELECT visibleWidth('ClickHouse是一个很好的数据库') SETTINGS function_visible_width_behavior = 0; +SELECT visibleWidth('ClickHouse是一个很好的数据库') SETTINGS function_visible_width_behavior = 1; +SELECT visibleWidth('ClickHouse是一个很好的数据库') SETTINGS function_visible_width_behavior = 2; -- { serverError BAD_ARGUMENTS } +SELECT visibleWidth('ClickHouse是一个很好的数据库') SETTINGS compatibility = '23.12'; +SELECT visibleWidth('ClickHouse是一个很好的数据库') SETTINGS compatibility = '24.1'; diff --git a/tests/queries/0_stateless/data_parquet/02969.zip b/tests/queries/0_stateless/data_parquet/02969.zip new file mode 100644 index 00000000000..4c4c90261d0 Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/02969.zip differ