diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 100ec3b3b2c..039f6ff3766 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -59,16 +59,17 @@ jobs: uses: ./.github/workflows/reusable_docker.yml with: data: ${{ needs.RunConfig.outputs.data }} - StyleCheck: - needs: [RunConfig, BuildDockers] - if: ${{ !failure() && !cancelled() }} - uses: ./.github/workflows/reusable_test.yml - with: - test_name: Style check - runner_type: style-checker - data: ${{ needs.RunConfig.outputs.data }} - run_command: | - python3 style_check.py --no-push + # Tested in MQ + # StyleCheck: + # needs: [RunConfig, BuildDockers] + # if: ${{ !failure() && !cancelled() }} + # uses: ./.github/workflows/reusable_test.yml + # with: + # test_name: Style check + # runner_type: style-checker + # data: ${{ needs.RunConfig.outputs.data }} + # run_command: | + # python3 style_check.py --no-push CompatibilityCheckX86: needs: [RunConfig, BuilderDebRelease] if: ${{ !failure() && !cancelled() }} @@ -447,6 +448,14 @@ jobs: test_name: Stateless tests (debug) runner_type: func-tester data: ${{ needs.RunConfig.outputs.data }} + FunctionalStatelessTestAsanAzure: + needs: [RunConfig, BuilderDebAsan] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Stateless tests (azure, asan) + runner_type: func-tester + data: ${{ needs.RunConfig.outputs.data }} ############################################################################################## ############################ FUNCTIONAl STATEFUL TESTS ####################################### ############################################################################################## @@ -597,6 +606,14 @@ jobs: test_name: Stress test (tsan) runner_type: stress-tester data: ${{ needs.RunConfig.outputs.data }} + StressTestTsanAzure: + needs: [RunConfig, BuilderDebTsan] + if: ${{ !failure() && !cancelled() }} + uses: ./.github/workflows/reusable_test.yml + with: + test_name: Stress test (azure, tsan) + runner_type: stress-tester + data: ${{ needs.RunConfig.outputs.data }} StressTestMsan: needs: [RunConfig, BuilderDebMsan] if: ${{ !failure() && !cancelled() }} diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c2e76de5e14..907b4e79617 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -158,7 +158,7 @@ jobs: # FinishCheck: if: ${{ !failure() && !cancelled() }} - needs: [Tests_1, Tests_2, Builds_1_Report, Builds_2_Report] + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2] runs-on: [self-hosted, style-checker] steps: - name: Check out repository code @@ -171,7 +171,7 @@ jobs: - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 finish_check.py + python3 finish_check.py ${{ (contains(needs.*.result, 'failure') && github.event_name == 'merge_group') && '--pipeline-failure' || '' }} - name: Auto merge if approved if: ${{ github.event_name != 'merge_group' }} run: | diff --git a/.gitmessage b/.gitmessage index 797446edd49..89ee7d35d23 100644 --- a/.gitmessage +++ b/.gitmessage @@ -16,7 +16,7 @@ #ci_set_reduced #ci_set_arm #ci_set_integration -#ci_set_analyzer +#ci_set_old_analyzer ## To run specified job in CI: #job_ diff --git a/CHANGELOG.md b/CHANGELOG.md index dd88f3ee2c7..f40c42c4462 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,9 +38,9 @@ * Optimized function `dotProduct` to omit unnecessary and expensive memory copies. [#60928](https://github.com/ClickHouse/ClickHouse/pull/60928) ([Robert Schulze](https://github.com/rschu1ze)). * 30x faster printing for 256-bit integers. [#61100](https://github.com/ClickHouse/ClickHouse/pull/61100) ([Raúl Marín](https://github.com/Algunenano)). * If the table's primary key contains mostly useless columns, don't keep them in memory. This is controlled by a new setting `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns` with the value `0.9` by default, which means: for a composite primary key, if a column changes its value for at least 0.9 of all the times, the next columns after it will be not loaded. [#60255](https://github.com/ClickHouse/ClickHouse/pull/60255) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Improve the performance of serialized aggregation method when involving multiple `Nullable` columns. [#55809](https://github.com/ClickHouse/ClickHouse/pull/55809) ([Amos Bird](https://github.com/amosbird)). -* Lazy build JSON's output to improve performance of ALL JOIN. [#58278](https://github.com/ClickHouse/ClickHouse/pull/58278) ([LiuNeng](https://github.com/liuneng1994)). -* Make HTTP/HTTPs connections with external services, such as AWS S3 reusable for all uses cases. Even when response is 3xx or 4xx. [#58845](https://github.com/ClickHouse/ClickHouse/pull/58845) ([Sema Checherinda](https://github.com/CheSema)). +* Improve the performance of serialized aggregation methods when involving multiple `Nullable` columns. [#55809](https://github.com/ClickHouse/ClickHouse/pull/55809) ([Amos Bird](https://github.com/amosbird)). +* Lazy builds JSON's output to improve performance of ALL JOIN. [#58278](https://github.com/ClickHouse/ClickHouse/pull/58278) ([LiuNeng](https://github.com/liuneng1994)). +* Make HTTP/HTTPs connections with external services, such as AWS S3 reusable for all use cases. Even when the response is 3xx or 4xx. [#58845](https://github.com/ClickHouse/ClickHouse/pull/58845) ([Sema Checherinda](https://github.com/CheSema)). * Improvements to aggregate functions `argMin` / `argMax` / `any` / `anyLast` / `anyHeavy`, as well as `ORDER BY {u8/u16/u32/u64/i8/i16/u32/i64) LIMIT 1` queries. [#58640](https://github.com/ClickHouse/ClickHouse/pull/58640) ([Raúl Marín](https://github.com/Algunenano)). * Trivial optimization for column's filter. Peak memory can be reduced to 44% of the original in some cases. [#59698](https://github.com/ClickHouse/ClickHouse/pull/59698) ([李扬](https://github.com/taiyang-li)). * Execute `multiIf` function in a columnar fashion when the result type's underlying type is a number. [#60384](https://github.com/ClickHouse/ClickHouse/pull/60384) ([李扬](https://github.com/taiyang-li)). @@ -49,7 +49,7 @@ * Optimize data movement between columns of a Nullable number or a Nullable string, which improves some micro-benchmarks. [#60846](https://github.com/ClickHouse/ClickHouse/pull/60846) ([李扬](https://github.com/taiyang-li)). * Operations with the filesystem cache will suffer less from the lock contention. [#61066](https://github.com/ClickHouse/ClickHouse/pull/61066) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Optimize array join and other JOINs by preventing a wrong compiler's optimization. Close [#61074](https://github.com/ClickHouse/ClickHouse/issues/61074). [#61075](https://github.com/ClickHouse/ClickHouse/pull/61075) ([李扬](https://github.com/taiyang-li)). -* If a query with a syntax error contained `COLUMNS` matcher with a regular expression, the regular expression was compiled each time during the parser's backtracking, instead of being compiled once. This was a fundamental error. The compiled regexp was put to AST. But the letter A in AST means "abstract" which means it should not contain heavyweight objects. Parts of AST can be created and discarded during parsing, including a large number of backtracking. This leads to slowness on the parsing side and consequently allows DoS by a readonly user. But the main problem is that it prevents progress in fuzzers. [#61543](https://github.com/ClickHouse/ClickHouse/pull/61543) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* If a query with a syntax error contained the `COLUMNS` matcher with a regular expression, the regular expression was compiled each time during the parser's backtracking, instead of being compiled once. This was a fundamental error. The compiled regexp was put to AST. But the letter A in AST means "abstract" which means it should not contain heavyweight objects. Parts of AST can be created and discarded during parsing, including a large number of backtracking. This leads to slowness on the parsing side and consequently allows DoS by a readonly user. But the main problem is that it prevents progress in fuzzers. [#61543](https://github.com/ClickHouse/ClickHouse/pull/61543) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Add a new analyzer pass to optimize the IN operator for a single value. [#61564](https://github.com/ClickHouse/ClickHouse/pull/61564) ([LiuNeng](https://github.com/liuneng1994)). * DNSResolver shuffles set of resolved IPs which is needed to uniformly utilize multiple endpoints of AWS S3. [#60965](https://github.com/ClickHouse/ClickHouse/pull/60965) ([Sema Checherinda](https://github.com/CheSema)). diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 9d53b2004b4..a3523203912 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -8,9 +8,6 @@ option (SANITIZE "Enable one of the code sanitizers" "") set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER") -# It's possible to pass an ignore list to sanitizers (-fsanitize-ignorelist). Intentionally not doing this because -# 1. out-of-source suppressions are awkward 2. it seems ignore lists don't work after the Clang v16 upgrade (#49829) - if (SANITIZE) if (SANITIZE STREQUAL "address") set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope") diff --git a/contrib/curl b/contrib/curl index 1a05e833f8f..de7b3e89218 160000 --- a/contrib/curl +++ b/contrib/curl @@ -1 +1 @@ -Subproject commit 1a05e833f8f7140628b27882b10525fd9ec4b873 +Subproject commit de7b3e89218467159a7af72d58cea8425946e97d diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index e74629e57b3..dcc1c7c42e7 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -33,14 +33,15 @@ set (SRCS "${LIBRARY_DIR}/lib/curl_memrchr.c" "${LIBRARY_DIR}/lib/curl_multibyte.c" "${LIBRARY_DIR}/lib/curl_ntlm_core.c" - "${LIBRARY_DIR}/lib/curl_ntlm_wb.c" "${LIBRARY_DIR}/lib/curl_path.c" "${LIBRARY_DIR}/lib/curl_range.c" "${LIBRARY_DIR}/lib/curl_rtmp.c" "${LIBRARY_DIR}/lib/curl_sasl.c" + "${LIBRARY_DIR}/lib/curl_sha512_256.c" "${LIBRARY_DIR}/lib/curl_sspi.c" "${LIBRARY_DIR}/lib/curl_threads.c" "${LIBRARY_DIR}/lib/curl_trc.c" + "${LIBRARY_DIR}/lib/cw-out.c" "${LIBRARY_DIR}/lib/dict.c" "${LIBRARY_DIR}/lib/doh.c" "${LIBRARY_DIR}/lib/dynbuf.c" @@ -98,6 +99,7 @@ set (SRCS "${LIBRARY_DIR}/lib/psl.c" "${LIBRARY_DIR}/lib/rand.c" "${LIBRARY_DIR}/lib/rename.c" + "${LIBRARY_DIR}/lib/request.c" "${LIBRARY_DIR}/lib/rtsp.c" "${LIBRARY_DIR}/lib/select.c" "${LIBRARY_DIR}/lib/sendf.c" diff --git a/contrib/curl-cmake/curl_config.h b/contrib/curl-cmake/curl_config.h index 4d4c2972f57..9535e9a3a4f 100644 --- a/contrib/curl-cmake/curl_config.h +++ b/contrib/curl-cmake/curl_config.h @@ -38,6 +38,7 @@ #define HAVE_ARPA_INET_H #define HAVE_ERRNO_H +#define HAVE_GETSOCKNAME #define HAVE_FCNTL_H #define HAVE_NETDB_H #define HAVE_NETINET_IN_H diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile index 592c3e36ef7..62dcd2ca997 100644 --- a/docker/test/integration/kerberized_hadoop/Dockerfile +++ b/docker/test/integration/kerberized_hadoop/Dockerfile @@ -14,11 +14,14 @@ RUN curl -o krb5-libs-1.10.3-65.el6.x86_64.rpm ftp://ftp.pbone.net/mirror/vault. rpm -Uvh libkadm5-1.10.3-65.el6.x86_64.rpm libss-1.41.12-24.el6.x86_64.rpm krb5-libs-1.10.3-65.el6.x86_64.rpm krb5-workstation-1.10.3-65.el6.x86_64.rpm libcom_err-1.41.12-24.el6.x86_64.rpm && \ rm -fr *.rpm +ADD https://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz /tmp/commons-daemon-1.0.15-src.tar.gz + RUN cd /tmp && \ - curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \ tar xzf commons-daemon-1.0.15-src.tar.gz && \ cd commons-daemon-1.0.15-src/src/native/unix && \ ./configure && \ make && \ cp ./jsvc /usr/local/hadoop-2.7.0/sbin && \ - [ -e /usr/local/hadoop ] || ln -s ./hadoop-2.7.0 /usr/local/hadoop + cd /tmp && \ + rm -rf commons-daemon-1.0.15-src* && \ + { [ -e /usr/local/hadoop ] || ln -s ./hadoop-2.7.0 /usr/local/hadoop; } diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 6e19e57ed53..09a9f51084b 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -19,7 +19,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test # install test configs /usr/share/clickhouse-test/config/install.sh -azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & +azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --silent --inMemoryPersistence & ./setup_minio.sh stateful config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml @@ -87,7 +87,7 @@ function start() tail -n1000 /var/log/clickhouse-server/clickhouse-server.log break fi - timeout 120 service clickhouse-server start + timeout 120 sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid sleep 0.5 counter=$((counter + 1)) done diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 55dfe5e2e08..4d2c2e6f466 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -42,12 +42,6 @@ source /utils.lib # install test configs /usr/share/clickhouse-test/config/install.sh -if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then - echo "Azure is disabled" -else - azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & -fi - ./setup_minio.sh stateless ./setup_hdfs_minicluster.sh @@ -97,12 +91,11 @@ if [ "$NUM_TRIES" -gt "1" ]; then export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US_MAX=10000 mkdir -p /var/run/clickhouse-server - # simplest way to forward env variables to server - sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid -else - sudo clickhouse start fi +# simplest way to forward env variables to server +sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon --pid-file /var/run/clickhouse-server/clickhouse-server.pid + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo sed -i "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_1/|" /etc/clickhouse-server1/config.d/filesystem_caches_path.xml @@ -212,6 +205,14 @@ function run_tests() ADDITIONAL_OPTIONS+=('--s3-storage') fi + if [[ -n "$USE_AZURE_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + # to disable the same tests + ADDITIONAL_OPTIONS+=('--s3-storage') + # azurite is slow, but with these two settings it can be super slow + ADDITIONAL_OPTIONS+=('--no-random-settings') + ADDITIONAL_OPTIONS+=('--no-random-merge-tree-settings') + fi + if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--shared-catalog') fi @@ -286,7 +287,7 @@ stop_logs_replication failed_to_save_logs=0 for table in query_log zookeeper_log trace_log transactions_info_log metric_log do - err=$( { clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst; } 2>&1 ) + err=$(clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.gz' format TSVWithNamesAndTypes") echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index b69f1d28fcf..23f942a00a2 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -279,7 +279,7 @@ function check_logs_for_critical_errors() function collect_query_and_trace_logs() { - for table in query_log trace_log + for table in query_log trace_log metric_log do clickhouse-local --config-file=/etc/clickhouse-server/config.xml --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: done diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 4caa9e48885..6d121ba4142 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -52,7 +52,6 @@ export ZOOKEEPER_FAULT_INJECTION=1 # available for dump via clickhouse-local configure -azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & ./setup_minio.sh stateless # to have a proper environment config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml diff --git a/docs/changelogs/v23.8.13.25-lts.md b/docs/changelogs/v23.8.13.25-lts.md new file mode 100644 index 00000000000..3452621556a --- /dev/null +++ b/docs/changelogs/v23.8.13.25-lts.md @@ -0,0 +1,37 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.8.13.25-lts (37e034f903e) FIXME as compared to v23.8.12.13-lts (bdbd0d87e5d) + +#### Improvement +* Backported in [#61930](https://github.com/ClickHouse/ClickHouse/issues/61930): Fixed accounting of memory allocated before attaching thread to a query or a user. [#56089](https://github.com/ClickHouse/ClickHouse/pull/56089) ([Nikita Taranov](https://github.com/nickitat)). + +#### Build/Testing/Packaging Improvement +* Backported in [#62007](https://github.com/ClickHouse/ClickHouse/issues/62007): Remove from the Keeper Docker image the volumes at /etc/clickhouse-keeper and /var/log/clickhouse-keeper. [#61683](https://github.com/ClickHouse/ClickHouse/pull/61683) ([Tristan](https://github.com/Tristan971)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix REPLACE/MOVE PARTITION with zero-copy replication [#54193](https://github.com/ClickHouse/ClickHouse/pull/54193) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix ATTACH query with external ON CLUSTER [#61365](https://github.com/ClickHouse/ClickHouse/pull/61365) ([Nikolay Degterinsky](https://github.com/evillique)). +* Cancel merges before removing moved parts [#61610](https://github.com/ClickHouse/ClickHouse/pull/61610) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Mark CANNOT_PARSE_ESCAPE_SEQUENCE error as parse error to be able to skip it in row input formats [#61883](https://github.com/ClickHouse/ClickHouse/pull/61883) ([Kruglov Pavel](https://github.com/Avogar)). +* Try to fix segfault in Hive engine [#62578](https://github.com/ClickHouse/ClickHouse/pull/62578) ([Nikolay Degterinsky](https://github.com/evillique)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#62201](https://github.com/ClickHouse/ClickHouse/issues/62201):. [#62190](https://github.com/ClickHouse/ClickHouse/pull/62190) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Backported in [#62796](https://github.com/ClickHouse/ClickHouse/issues/62796): We won't fail the job when GH fails to retrieve the job ID and URLs. [#62651](https://github.com/ClickHouse/ClickHouse/pull/62651) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#62968](https://github.com/ClickHouse/ClickHouse/issues/62968):. [#62932](https://github.com/ClickHouse/ClickHouse/pull/62932) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NO CL CATEGORY + +* Backported in [#62585](https://github.com/ClickHouse/ClickHouse/issues/62585):. [#60078](https://github.com/ClickHouse/ClickHouse/pull/60078) ([Maksim Kita](https://github.com/kitaisreal)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Throw on query timeout in ZooKeeperRetries [#60922](https://github.com/ClickHouse/ClickHouse/pull/60922) ([Antonio Andelic](https://github.com/antonio2368)). + diff --git a/docs/en/engines/database-engines/replicated.md b/docs/en/engines/database-engines/replicated.md index 27bdc20ec57..4fcd9272f5f 100644 --- a/docs/en/engines/database-engines/replicated.md +++ b/docs/en/engines/database-engines/replicated.md @@ -4,7 +4,7 @@ sidebar_position: 30 sidebar_label: Replicated --- -# [experimental] Replicated +# Replicated The engine is based on the [Atomic](../../engines/database-engines/atomic.md) engine. It supports replication of metadata via DDL log being written to ZooKeeper and executed on all of the replicas for a given database. diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 96e6bab6997..dbd1c270a4a 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -8,6 +8,8 @@ sidebar_label: HDFS This engine provides integration with the [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html) via ClickHouse. This engine is similar to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features. +This feature is not supported by ClickHouse engineers, and it is known to have a sketchy quality. In case of any problems, fix them yourself and submit a pull request. + ## Usage {#usage} ``` sql diff --git a/docs/en/getting-started/example-datasets/recipes.md b/docs/en/getting-started/example-datasets/recipes.md index e0a66022d37..a8808e376e0 100644 --- a/docs/en/getting-started/example-datasets/recipes.md +++ b/docs/en/getting-started/example-datasets/recipes.md @@ -10,7 +10,8 @@ The RecipeNLG dataset is available for download [here](https://recipenlg.cs.put. 1. Go to the download page [https://recipenlg.cs.put.poznan.pl/dataset](https://recipenlg.cs.put.poznan.pl/dataset). 1. Accept Terms and Conditions and download zip file. -1. Unpack the zip file with `unzip`. You will get the `full_dataset.csv` file. +1. Option: Using the `md5sum dataset.zip` to validate the zip file and it should be equal to `3a168dfd0912bb034225619b3586ce76`. +1. Unpack the zip file with `unzip dataset.zip`. You will get the `full_dataset.csv` file in the `dataset` directory. ## Create a Table @@ -72,7 +73,7 @@ Result: ``` text ┌─count()─┐ -│ 2231141 │ +│ 2231142 │ └─────────┘ ``` @@ -115,7 +116,7 @@ Result: │ egg │ 160507 │ │ baking powder │ 148277 │ │ lemon juice │ 146414 │ -│ Salt │ 122557 │ +│ Salt │ 122558 │ │ cinnamon │ 117927 │ │ sour cream │ 116682 │ │ cream cheese │ 114423 │ diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 45c960c6b6f..eb1a3ba1dbc 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -327,7 +327,9 @@ Use buffering to avoid situations where a query processing error occurred after ## Setting a role with query parameters {#setting-role-with-query-parameters} -In certain scenarios, it might be required to set the granted role first, before executing the statement itself. +This is a new feature added in ClickHouse 24.4. + +In specific scenarios, setting the granted role first might be required before executing the statement itself. However, it is not possible to send `SET ROLE` and the statement together, as multi-statements are not allowed: ``` @@ -346,7 +348,7 @@ To overcome this limitation, you could use the `role` query parameter instead: curl -sS "http://localhost:8123?role=my_role" --data-binary "SELECT * FROM my_table;" ``` -This will be an equivalent of executing `SET ROLE my_role` before the statement. +This will be the equivalent of executing `SET ROLE my_role` before the statement. Additionally, it is possible to specify multiple `role` query parameters: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 869a0be2574..5518fd7e1ec 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -7,6 +7,8 @@ toc_max_heading_level: 2 # Core Settings +All below settings are also available in table [system.settings](/docs/en/operations/system-tables/settings). + ## additional_table_filters An additional filter expression that is applied after reading @@ -3931,19 +3933,6 @@ For example, `avg(if(cond, col, null))` can be rewritten to `avgOrNullIf(cond, c Supported only with experimental analyzer (`allow_experimental_analyzer = 1`). ::: -## allow_experimental_database_replicated {#allow_experimental_database_replicated} - -Enables to create databases with [Replicated](../../engines/database-engines/replicated.md) engine. - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: `0`. - -Cloud default value: `1`. - ## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec} Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds. diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 119684ba68d..ed22679a3e6 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -111,7 +111,7 @@ On newer Linux kernels transparent huge pages are alright. $ echo 'madvise' | sudo tee /sys/kernel/mm/transparent_hugepage/enabled ``` -If you want to modify the transparent huge pages setting permanently, editing the `/etc/default/grub` to add the `transparent_hugepage=never` to the `GRUB_CMDLINE_LINUX_DEFAULT` option: +If you want to modify the transparent huge pages setting permanently, editing the `/etc/default/grub` to add the `transparent_hugepage=madvise` to the `GRUB_CMDLINE_LINUX_DEFAULT` option: ```bash $ GRUB_CMDLINE_LINUX_DEFAULT="transparent_hugepage=madvise ..." diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 26351301a3b..694a69b64cc 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -3213,6 +3213,74 @@ Result: └───────────────────────────────────────┘ ``` +## connectionId + +Retrieves the connection ID of the client that submitted the current query and returns it as a UInt64 integer. + +**Syntax** + +```sql +connectionId() +``` + +**Parameters** + +None. + +**Returned value** + +Returns an integer of type UInt64. + +**Implementation details** + +This function is most useful in debugging scenarios or for internal purposes within the MySQL handler. It was created for compatibility with [MySQL's `CONNECTION_ID` function](https://dev.mysql.com/doc/refman/8.0/en/information-functions.html#function_connection-id) It is not typically used in production queries. + +**Example** + +Query: + +```sql +SELECT connectionId(); +``` + +```response +0 +``` + +## connection_id + +An alias of `connectionId`. Retrieves the connection ID of the client that submitted the current query and returns it as a UInt64 integer. + +**Syntax** + +```sql +connection_id() +``` + +**Parameters** + +None. + +**Returned value** + +Returns an integer of type UInt64. + +**Implementation details** + +This function is most useful in debugging scenarios or for internal purposes within the MySQL handler. It was created for compatibility with [MySQL's `CONNECTION_ID` function](https://dev.mysql.com/doc/refman/8.0/en/information-functions.html#function_connection-id) It is not typically used in production queries. + +**Example** + +Query: + +```sql +SELECT connection_id(); +``` + +```response +0 +``` + ## getClientHTTPHeader Get the value of an HTTP header. diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index f18f029740e..afec43cd6f4 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -79,9 +79,9 @@ round(expression [, decimal_places]) The rounded number of the same type as the input number. -### Examples +**Examples** -**Example of use with Float** +Example of usage with Float: ``` sql SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3; @@ -95,7 +95,7 @@ SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3; └─────┴──────────────────────────┘ ``` -**Example of use with Decimal** +Example of usage with Decimal: ``` sql SELECT cast(number / 2 AS Decimal(10,4)) AS x, round(x) FROM system.numbers LIMIT 3; @@ -124,9 +124,7 @@ SELECT cast(number / 2 AS Decimal(10,4)) AS x, round(x) FROM system.numbers LIM └────────┴──────────────────────────────────────────────────┘ ``` -**Examples of rounding** - -Rounding to the nearest number. +Examples of rounding to the nearest number: ``` text round(3.2, 0) = 3 @@ -183,9 +181,7 @@ roundBankers(expression [, decimal_places]) A value rounded by the banker’s rounding method. -### Examples - -**Example of use** +**Examples** Query: @@ -210,7 +206,7 @@ Result: └─────┴───┘ ``` -**Examples of Banker’s rounding** +Examples of Banker’s rounding: ``` text roundBankers(0.4) = 0 @@ -226,25 +222,180 @@ roundBankers(10.755, 2) = 10.76 - [round](#rounding_functions-round) -## roundToExp2(num) +## roundToExp2 -Accepts a number. If the number is less than one, it returns 0. Otherwise, it rounds the number down to the nearest (whole non-negative) degree of two. +Accepts a number. If the number is less than one, it returns `0`. Otherwise, it rounds the number down to the nearest (whole non-negative) degree of two. -## roundDuration(num) +**Syntax** -Accepts a number. If the number is less than one, it returns 0. Otherwise, it rounds the number down to numbers from the set: 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000. +```sql +roundToExp2(num) +``` -## roundAge(num) +**Parameters** -Accepts a number. If the number is -- smaller than 1, it returns 0, -- between 1 and 17, it returns 17, -- between 18 and 24, it returns 18, -- between 25 and 34, it returns 25, -- between 35 and 44, it returns 35, -- between 45 and 54, it returns 45, -- larger than 55, it returns 55. +- `num`: A number representing an age in years. [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md). -## roundDown(num, arr) +**Returned value** + +- `0`, for `num` $\lt 1$. [UInt8](../data-types/int-uint.md). +- `num` rounded down to the nearest (whole non-negative) degree of two. [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md) equivalent to the input type. + +**Example** + +Query: + +```sql +SELECT *, roundToExp2(*) FROM system.numbers WHERE number IN (0, 2, 5, 10, 19, 50) +``` + +Result: + +```response +┌─number─┬─roundToExp2(number)─┐ +│ 0 │ 0 │ +│ 2 │ 2 │ +│ 5 │ 4 │ +│ 10 │ 8 │ +│ 19 │ 16 │ +│ 50 │ 32 │ +└────────┴─────────────────────┘ +``` + +## roundDuration + +Accepts a number. If the number is less than one, it returns `0`. Otherwise, it rounds the number down to numbers from the set of commonly used durations: `1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000`. + +**Syntax** + +```sql +roundDuration(num) +``` + +**Parameters** + +- `num`: A number to round to one of the numbers in the set of common durations. [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md). + +**Returned value** + +- `0`, for `num` $\lt 1$. +- Otherwise, one of: `1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000`. [UInt16](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT *, roundDuration(*) FROM system.numbers WHERE number IN (0, 9, 19, 47, 101, 149, 205, 271, 421, 789, 1423, 2345, 4567, 9876, 24680, 42573) +``` + +Result: + +```response +┌─number─┬─roundDuration(number)─┐ +│ 0 │ 0 │ +│ 9 │ 1 │ +│ 19 │ 10 │ +│ 47 │ 30 │ +│ 101 │ 60 │ +│ 149 │ 120 │ +│ 205 │ 180 │ +│ 271 │ 240 │ +│ 421 │ 300 │ +│ 789 │ 600 │ +│ 1423 │ 1200 │ +│ 2345 │ 1800 │ +│ 4567 │ 3600 │ +│ 9876 │ 7200 │ +│ 24680 │ 18000 │ +│ 42573 │ 36000 │ +└────────┴───────────────────────┘ +``` + +## roundAge + +Accepts a number within various commonly used ranges of human age and returns either a maximum or a minimum within that range. + +**Syntax** + +```sql +roundAge(num) +``` + +**Parameters** + +- `age`: A number representing an age in years. [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md). + +**Returned value** + +- Returns `0`, for $age \lt 1$. +- Returns `17`, for $1 \leq age \leq 17$. +- Returns `18`, for $18 \leq age \leq 24$. +- Returns `25`, for $25 \leq age \leq 34$. +- Returns `35`, for $35 \leq age \leq 44$. +- Returns `45`, for $45 \leq age \leq 54$. +- Returns `55`, for $age \geq 55$. + +Type: [UInt8](../data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT *, roundAge(*) FROM system.numbers WHERE number IN (0, 5, 20, 31, 37, 54, 72); +``` + +Result: + +```response +┌─number─┬─roundAge(number)─┐ +│ 0 │ 0 │ +│ 5 │ 17 │ +│ 20 │ 18 │ +│ 31 │ 25 │ +│ 37 │ 35 │ +│ 54 │ 45 │ +│ 72 │ 55 │ +└────────┴──────────────────┘ +``` + +## roundDown Accepts a number and rounds it down to an element in the specified array. If the value is less than the lowest bound, the lowest bound is returned. + +**Syntax** + +```sql +roundDown(num, arr) +``` + +**Parameters** + +- `num`: A number to round down. [Numeric](../data-types/int-uint.md). +- `arr`: Array of elements to round `age` down to. [Array](../data-types/array.md) of [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md) type. + +**Returned value** + +- Number rounded down to an element in `arr`. If the value is less than the lowest bound, the lowest bound is returned. [UInt](../data-types/int-uint.md)/[Float](../data-types/float.md) type deduced from the type of `arr`. + +**Example** + +Query: + +```sql +SELECT *, roundDown(*, [3, 4, 5]) FROM system.numbers WHERE number IN (0, 1, 2, 3, 4, 5) +``` + +Result: + +```response +┌─number─┬─roundDown(number, [3, 4, 5])─┐ +│ 0 │ 3 │ +│ 1 │ 3 │ +│ 2 │ 3 │ +│ 3 │ 3 │ +│ 4 │ 4 │ +│ 5 │ 5 │ +└────────┴──────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index a516f09d709..043686889c4 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -6,24 +6,29 @@ sidebar_label: Embedded Dictionaries # Functions for Working with Embedded Dictionaries +:::note In order for the functions below to work, the server config must specify the paths and addresses for getting all the embedded dictionaries. The dictionaries are loaded at the first call of any of these functions. If the reference lists can’t be loaded, an exception is thrown. -For information about creating reference lists, see the section “Dictionaries”. +As such, the examples shown in this section will throw an exception in [ClickHouse Fiddle](https://fiddle.clickhouse.com/) and in quick release and production deployments by default, unless first configured. +::: + +For information about creating reference lists, see the section [“Dictionaries”](../dictionaries#embedded-dictionaries). ## Multiple Geobases ClickHouse supports working with multiple alternative geobases (regional hierarchies) simultaneously, in order to support various perspectives on which countries certain regions belong to. -The ‘clickhouse-server’ config specifies the file with the regional hierarchy::`/opt/geo/regions_hierarchy.txt` +The ‘clickhouse-server’ config specifies the file with the regional hierarchy: -Besides this file, it also searches for files nearby that have the _ symbol and any suffix appended to the name (before the file extension). -For example, it will also find the file `/opt/geo/regions_hierarchy_ua.txt`, if present. +```/opt/geo/regions_hierarchy.txt``` -`ua` is called the dictionary key. For a dictionary without a suffix, the key is an empty string. +Besides this file, it also searches for files nearby that have the `_` symbol and any suffix appended to the name (before the file extension). +For example, it will also find the file `/opt/geo/regions_hierarchy_ua.txt`, if present. Here `ua` is called the dictionary key. For a dictionary without a suffix, the key is an empty string. -All the dictionaries are re-loaded in runtime (once every certain number of seconds, as defined in the builtin_dictionaries_reload_interval config parameter, or once an hour by default). However, the list of available dictionaries is defined one time, when the server starts. +All the dictionaries are re-loaded during runtime (once every certain number of seconds, as defined in the [`builtin_dictionaries_reload_interval`](../../operations/server-configuration-parameters/settings#builtin-dictionaries-reload-interval) config parameter, or once an hour by default). However, the list of available dictionaries is defined once, when the server starts. All functions for working with regions have an optional argument at the end – the dictionary key. It is referred to as the geobase. + Example: ``` sql @@ -32,13 +37,116 @@ regionToCountry(RegionID, '') – Uses the default dictionary: /opt/geo/regions_ regionToCountry(RegionID, 'ua') – Uses the dictionary for the 'ua' key: /opt/geo/regions_hierarchy_ua.txt ``` -### regionToCity(id\[, geobase\]) +### regionToName -Accepts a UInt32 number – the region ID from the geobase. If this region is a city or part of a city, it returns the region ID for the appropriate city. Otherwise, returns 0. +Accepts a region ID and geobase and returns a string of the name of the region in the corresponding language. If the region with the specified ID does not exist, an empty string is returned. -### regionToArea(id\[, geobase\]) +**Syntax** -Converts a region to an area (type 5 in the geobase). In every other way, this function is the same as ‘regionToCity’. +``` sql +regionToName(id\[, lang\]) +``` +**Parameters** + +- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint). +- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional. + +**Returned value** + +- Name of the region in the corresponding language specified by `geobase`. [String](../data-types/string). +- Otherwise, an empty string. + +**Example** + +Query: + +``` sql +SELECT regionToName(number::UInt32,'en') FROM numbers(0,5); +``` + +Result: + +``` text +┌─regionToName(CAST(number, 'UInt32'), 'en')─┐ +│ │ +│ World │ +│ USA │ +│ Colorado │ +│ Boulder County │ +└────────────────────────────────────────────┘ +``` + +### regionToCity + +Accepts a region ID from the geobase. If this region is a city or part of a city, it returns the region ID for the appropriate city. Otherwise, returns 0. + +**Syntax** + +```sql +regionToCity(id [, geobase]) +``` + +**Parameters** + +- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint). +- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional. + +**Returned value** + +- Region ID for the appropriate city, if it exists. [UInt32](../data-types/int-uint). +- 0, if there is none. + +**Example** + +Query: + +```sql +SELECT regionToName(number::UInt32, 'en'), regionToCity(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +``` + +Result: + +```response +┌─regionToName(CAST(number, 'UInt32'), 'en')─┬─id─┬─regionToName(regionToCity(CAST(number, 'UInt32')), 'en')─┐ +│ │ 0 │ │ +│ World │ 0 │ │ +│ USA │ 0 │ │ +│ Colorado │ 0 │ │ +│ Boulder County │ 0 │ │ +│ Boulder │ 5 │ Boulder │ +│ China │ 0 │ │ +│ Sichuan │ 0 │ │ +│ Chengdu │ 8 │ Chengdu │ +│ America │ 0 │ │ +│ North America │ 0 │ │ +│ Eurasia │ 0 │ │ +│ Asia │ 0 │ │ +└────────────────────────────────────────────┴────┴──────────────────────────────────────────────────────────┘ +``` + +### regionToArea + +Converts a region to an area (type 5 in the geobase). In every other way, this function is the same as [‘regionToCity’](#regiontocity). + +**Syntax** + +```sql +regionToArea(id [, geobase]) +``` + +**Parameters** + +- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint). +- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional. + +**Returned value** + +- Region ID for the appropriate area, if it exists. [UInt32](../data-types/int-uint). +- 0, if there is none. + +**Example** + +Query: ``` sql SELECT DISTINCT regionToName(regionToArea(toUInt32(number), 'ua')) @@ -46,6 +154,8 @@ FROM system.numbers LIMIT 15 ``` +Result: + ``` text ┌─regionToName(regionToArea(toUInt32(number), \'ua\'))─┐ │ │ @@ -66,16 +176,38 @@ LIMIT 15 └──────────────────────────────────────────────────────┘ ``` -### regionToDistrict(id\[, geobase\]) +### regionToDistrict Converts a region to a federal district (type 4 in the geobase). In every other way, this function is the same as ‘regionToCity’. +**Syntax** + +```sql +regionToDistrict(id [, geobase]) +``` + +**Parameters** + +- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint). +- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional. + +**Returned value** + +- Region ID for the appropriate city, if it exists. [UInt32](../data-types/int-uint). +- 0, if there is none. + +**Example** + +Query: + ``` sql SELECT DISTINCT regionToName(regionToDistrict(toUInt32(number), 'ua')) FROM system.numbers LIMIT 15 ``` +Result: + ``` text ┌─regionToName(regionToDistrict(toUInt32(number), \'ua\'))─┐ │ │ @@ -96,17 +228,103 @@ LIMIT 15 └──────────────────────────────────────────────────────────┘ ``` -### regionToCountry(id\[, geobase\]) +### regionToCountry -Converts a region to a country. In every other way, this function is the same as ‘regionToCity’. -Example: `regionToCountry(toUInt32(213)) = 225` converts Moscow (213) to Russia (225). +Converts a region to a country (type 3 in the geobase). In every other way, this function is the same as ‘regionToCity’. -### regionToContinent(id\[, geobase\]) +**Syntax** -Converts a region to a continent. In every other way, this function is the same as ‘regionToCity’. -Example: `regionToContinent(toUInt32(213)) = 10001` converts Moscow (213) to Eurasia (10001). +```sql +regionToCountry(id [, geobase]) +``` -### regionToTopContinent(id\[, geobase\]) +**Parameters** + +- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint). +- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional. + +**Returned value** + +- Region ID for the appropriate country, if it exists. [UInt32](../data-types/int-uint). +- 0, if there is none. + +**Example** + +Query: + +``` sql +SELECT regionToName(number::UInt32, 'en'), regionToCountry(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +``` + +Result: + +``` text +┌─regionToName(CAST(number, 'UInt32'), 'en')─┬─id─┬─regionToName(regionToCountry(CAST(number, 'UInt32')), 'en')─┐ +│ │ 0 │ │ +│ World │ 0 │ │ +│ USA │ 2 │ USA │ +│ Colorado │ 2 │ USA │ +│ Boulder County │ 2 │ USA │ +│ Boulder │ 2 │ USA │ +│ China │ 6 │ China │ +│ Sichuan │ 6 │ China │ +│ Chengdu │ 6 │ China │ +│ America │ 0 │ │ +│ North America │ 0 │ │ +│ Eurasia │ 0 │ │ +│ Asia │ 0 │ │ +└────────────────────────────────────────────┴────┴─────────────────────────────────────────────────────────────┘ +``` + +### regionToContinent + +Converts a region to a continent (type 1 in the geobase). In every other way, this function is the same as ‘regionToCity’. + +**Syntax** + +```sql +regionToContinent(id [, geobase]) +``` + +**Parameters** + +- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint). +- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional. + +**Returned value** + +- Region ID for the appropriate continent, if it exists. [UInt32](../data-types/int-uint). +- 0, if there is none. + +**Example** + +Query: + +``` sql +SELECT regionToName(number::UInt32, 'en'), regionToContinent(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +``` + +Result: + +``` text +┌─regionToName(CAST(number, 'UInt32'), 'en')─┬─id─┬─regionToName(regionToContinent(CAST(number, 'UInt32')), 'en')─┐ +│ │ 0 │ │ +│ World │ 0 │ │ +│ USA │ 10 │ North America │ +│ Colorado │ 10 │ North America │ +│ Boulder County │ 10 │ North America │ +│ Boulder │ 10 │ North America │ +│ China │ 12 │ Asia │ +│ Sichuan │ 12 │ Asia │ +│ Chengdu │ 12 │ Asia │ +│ America │ 9 │ America │ +│ North America │ 10 │ North America │ +│ Eurasia │ 11 │ Eurasia │ +│ Asia │ 12 │ Asia │ +└────────────────────────────────────────────┴────┴───────────────────────────────────────────────────────────────┘ +``` + +### regionToTopContinent Finds the highest continent in the hierarchy for the region. @@ -116,37 +334,175 @@ Finds the highest continent in the hierarchy for the region. regionToTopContinent(id[, geobase]) ``` -**Arguments** +**Parameters** -- `id` — Region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint.md). -- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional. +- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint). +- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional. **Returned value** -- Identifier of the top level continent (the latter when you climb the hierarchy of regions). +- Identifier of the top level continent (the latter when you climb the hierarchy of regions).[UInt32](../data-types/int-uint). - 0, if there is none. -Type: `UInt32`. +**Example** -### regionToPopulation(id\[, geobase\]) +Query: -Gets the population for a region. -The population can be recorded in files with the geobase. See the section “Dictionaries”. -If the population is not recorded for the region, it returns 0. -In the geobase, the population might be recorded for child regions, but not for parent regions. +``` sql +SELECT regionToName(number::UInt32, 'en'), regionToTopContinent(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +``` -### regionIn(lhs, rhs\[, geobase\]) +Result: + +``` text +┌─regionToName(CAST(number, 'UInt32'), 'en')─┬─id─┬─regionToName(regionToTopContinent(CAST(number, 'UInt32')), 'en')─┐ +│ │ 0 │ │ +│ World │ 0 │ │ +│ USA │ 9 │ America │ +│ Colorado │ 9 │ America │ +│ Boulder County │ 9 │ America │ +│ Boulder │ 9 │ America │ +│ China │ 11 │ Eurasia │ +│ Sichuan │ 11 │ Eurasia │ +│ Chengdu │ 11 │ Eurasia │ +│ America │ 9 │ America │ +│ North America │ 9 │ America │ +│ Eurasia │ 11 │ Eurasia │ +│ Asia │ 11 │ Eurasia │ +└────────────────────────────────────────────┴────┴──────────────────────────────────────────────────────────────────┘ +``` + +### regionToPopulation + +Gets the population for a region. The population can be recorded in files with the geobase. See the section [“Dictionaries”](../dictionaries#embedded-dictionaries). If the population is not recorded for the region, it returns 0. In the geobase, the population might be recorded for child regions, but not for parent regions. + +**Syntax** + +``` sql +regionToPopulation(id[, geobase]) +``` + +**Parameters** + +- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint). +- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional. + +**Returned value** + +- Population for the region. [UInt32](../data-types/int-uint). +- 0, if there is none. + +**Example** + +Query: + +``` sql +SELECT regionToName(number::UInt32, 'en'), regionToPopulation(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +``` + +Result: + +``` text +┌─regionToName(CAST(number, 'UInt32'), 'en')─┬─population─┐ +│ │ 0 │ +│ World │ 4294967295 │ +│ USA │ 330000000 │ +│ Colorado │ 5700000 │ +│ Boulder County │ 330000 │ +│ Boulder │ 100000 │ +│ China │ 1500000000 │ +│ Sichuan │ 83000000 │ +│ Chengdu │ 20000000 │ +│ America │ 1000000000 │ +│ North America │ 600000000 │ +│ Eurasia │ 4294967295 │ +│ Asia │ 4294967295 │ +└────────────────────────────────────────────┴────────────┘ +``` + +### regionIn + +Checks whether a `lhs` region belongs to a `rhs` region. Returns a UInt8 number equal to 1 if it belongs, or 0 if it does not belong. + +**Syntax** + +``` sql +regionIn(lhs, rhs\[, geobase\]) +``` + +**Parameters** + +- `lhs` — Lhs region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint). +- `rhs` — Rhs region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint). +- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional. + +**Returned value** + +- 1, if it belongs. [UInt8](../../sql-reference/data-types/int-uint). +- 0, if it doesn't belong. + +**Implementation details** -Checks whether a ‘lhs’ region belongs to a ‘rhs’ region. Returns a UInt8 number equal to 1 if it belongs, or 0 if it does not belong. The relationship is reflexive – any region also belongs to itself. -### regionHierarchy(id\[, geobase\]) +**Example** + +Query: + +``` sql +SELECT regionToName(n1.number::UInt32, 'en') || (regionIn(n1.number::UInt32, n2.number::UInt32) ? ' is in ' : ' is not in ') || regionToName(n2.number::UInt32, 'en') FROM numbers(1,2) AS n1 CROSS JOIN numbers(1,5) AS n2; +``` + +Result: + +``` text +World is in World +World is not in USA +World is not in Colorado +World is not in Boulder County +World is not in Boulder +USA is in World +USA is in USA +USA is not in Colorado +USA is not in Boulder County +USA is not in Boulder +``` + +### regionHierarchy Accepts a UInt32 number – the region ID from the geobase. Returns an array of region IDs consisting of the passed region and all parents along the chain. -Example: `regionHierarchy(toUInt32(213)) = [213,1,3,225,10001,10000]`. -### regionToName(id\[, lang\]) +**Syntax** -Accepts a UInt32 number – the region ID from the geobase. A string with the name of the language can be passed as a second argument. Supported languages are: ru, en, ua, uk, by, kz, tr. If the second argument is omitted, the language ‘ru’ is used. If the language is not supported, an exception is thrown. Returns a string – the name of the region in the corresponding language. If the region with the specified ID does not exist, an empty string is returned. +``` sql +regionHierarchy(id\[, geobase\]) +``` -`ua` and `uk` both mean Ukrainian. +**Parameters** + +- `id` — Region ID from the geobase. [UInt32](../data-types/int-uint). +- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional. + +**Returned value** + +- Array of region IDs consisting of the passed region and all parents along the chain. [Array](../data-types/array)([UInt32](../data-types/int-uint)). + +**Example** + +Query: + +``` sql +SELECT regionHierarchy(number::UInt32) AS arr, arrayMap(id -> regionToName(id, 'en'), arr) FROM numbers(5); +``` + +Result: + +``` text +┌─arr────────────┬─arrayMap(lambda(tuple(id), regionToName(id, 'en')), regionHierarchy(CAST(number, 'UInt32')))─┐ +│ [] │ [] │ +│ [1] │ ['World'] │ +│ [2,10,9,1] │ ['USA','North America','America','World'] │ +│ [3,2,10,9,1] │ ['Colorado','USA','North America','America','World'] │ +│ [4,3,2,10,9,1] │ ['Boulder County','Colorado','USA','North America','America','World'] │ +└────────────────┴──────────────────────────────────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index 06742ff74e2..7a6e2ab054c 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -19,25 +19,51 @@ Subquery is another `SELECT` query that may be specified in parenthesis inside ` ## FINAL Modifier -When `FINAL` is specified, ClickHouse fully merges the data before returning the result and thus performs all data transformations that happen during merges for the given table engine. +When `FINAL` is specified, ClickHouse fully merges the data before returning the result. This also performs all data transformations that happen during merges for the given table engine. -It is applicable when selecting data from ReplacingMergeTree, SummingMergeTree, AggregatingMergeTree, CollapsingMergeTree and VersionedCollapsingMergeTree tables. +It is applicable when selecting data from from tables using the following table engines: +- `ReplacingMergeTree` +- `SummingMergeTree` +- `AggregatingMergeTree` +- `CollapsingMergeTree` +- `VersionedCollapsingMergeTree` `SELECT` queries with `FINAL` are executed in parallel. The [max_final_threads](../../../operations/settings/settings.md#max-final-threads) setting limits the number of threads used. -There are drawbacks to using `FINAL` (see below). - ### Drawbacks -Queries that use `FINAL` are executed slightly slower than similar queries that do not, because: +Queries that use `FINAL` execute slightly slower than similar queries that do not use `FINAL` because: - Data is merged during query execution. -- Queries with `FINAL` read primary key columns in addition to the columns specified in the query. +- Queries with `FINAL` may read primary key columns in addition to the columns specified in the query. -`FINAL` requires additional compute and memory resources, as the processing that normally would occur at merge time must occur in memory at the time of the query. However, using FINAL is sometimes necessary in order to produce accurate results, and is less expensive than running `OPTIMIZE` to force a merge. It is also sometimes possible to use different queries that assume the background processes of the `MergeTree` engine haven’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). If you need to use FINAL in your queries in order to get the required results, then it is okay to do so but be aware of the additional processing required. +`FINAL` requires additional compute and memory resources because the processing that normally would occur at merge time must occur in memory at the time of the query. However, using FINAL is sometimes necessary in order to produce accurate results (as data may not yet be fully merged). It is less expensive than running `OPTIMIZE` to force a merge. + +As an alternative to using `FINAL`, it is sometimes possible to use different queries that assume the background processes of the `MergeTree` engine have not yet occurred and deal with it by applying an aggregation (for example, to discard duplicates). If you need to use `FINAL` in your queries in order to get the required results, it is okay to do so but be aware of the additional processing required. `FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile. +### Example Usage + +**Using the `FINAL` keyword** + +```sql +SELECT x, y FROM mytable FINAL WHERE x > 1; +``` + +**Using `FINAL` as a query-level setting** + +```sql +SELECT x, y FROM mytable WHERE x > 1 SETTINGS final = 1; +``` + +**Using `FINAL` as a session-level setting** + +```sql +SET final = 1; +SELECT x, y FROM mytable WHERE x > 1; +``` + ## Implementation Details If the `FROM` clause is omitted, data will be read from the `system.one` table. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index f9456e34a56..2b3607dcf08 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -3447,17 +3447,6 @@ SELECT FROM fuse_tbl ``` -## allow_experimental_database_replicated {#allow_experimental_database_replicated} - -Позволяет создавать базы данных с движком [Replicated](../../engines/database-engines/replicated.md). - -Возможные значения: - -- 0 — Disabled. -- 1 — Enabled. - -Значение по умолчанию: `0`. - ## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec} Устанавливает, как долго начальный DDL-запрос должен ждать, пока реплицированная база данных прецессирует предыдущие записи очереди DDL в секундах. diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md index 0489606b3c1..bbc8b4fcb07 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md @@ -5,7 +5,7 @@ sidebar_position: 106 # argMax {#agg-function-argmax} -Вычисляет значение `arg` при максимальном значении `val`. +Вычисляет значение `arg` при максимальном значении `val`. Если несколько строк имеют одинаковое `val`, в которых равное значение является максимальным, то возвращаемое `arg` не является детерминированным. Обе части, arg и max, ведут себя как агрегатные функции, они обе пропускают Null во время обработки и возвращают не Null значения, если не Null значения доступны. **Синтаксис** @@ -49,3 +49,60 @@ SELECT argMax(user, salary), argMax(tuple(user, salary), salary) FROM salary; │ director │ ('director',5000) │ └──────────────────────┴─────────────────────────────────────┘ ``` + +**Дополнительный пример** + +```sql +CREATE TABLE test +( + a Nullable(String), + b Nullable(Int64) +) +ENGINE = Memory AS +SELECT * +FROM VALUES(('a', 1), ('b', 2), ('c', 2), (NULL, 3), (NULL, NULL), ('d', NULL)); + +select * from test; +┌─a────┬────b─┐ +│ a │ 1 │ +│ b │ 2 │ +│ c │ 2 │ +│ ᴺᵁᴸᴸ │ 3 │ +│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ +│ d │ ᴺᵁᴸᴸ │ +└──────┴──────┘ + +SELECT argMax(a, b), max(b) FROM test; +┌─argMax(a, b)─┬─max(b)─┐ +│ b │ 3 │ -- argMax = 'b' потому что это первое not Null значение, max(b) из другой строки! +└──────────────┴────────┘ + +SELECT argMax(tuple(a), b) FROM test; +┌─argMax(tuple(a), b)─┐ +│ (NULL) │ -- Кортеж `Tuple`, который содержит только `NULL` значения является не `NULL` кортежем, поэтому агрегатыне функции не будут пропускать эту строку с `NULL` значениями. +└─────────────────────┘ + +SELECT (argMax((a, b), b) as t).1 argMaxA, t.2 argMaxB FROM test; +┌─argMaxA─┬─argMaxB─┐ +│ ᴺᵁᴸᴸ │ 3 │ -- Вы можете использовать кортеж Tuple и получить оба значения для соответсвующего max(b). +└─────────┴─────────┘ + +SELECT argMax(a, b), max(b) FROM test WHERE a IS NULL AND b IS NULL; +┌─argMax(a, b)─┬─max(b)─┐ +│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ -- Все агрегированные строки содержат хотя бы одно `NULL` значение, поэтому все строки пропускаются и результатом будет `NULL`. +└──────────────┴────────┘ + +SELECT argMax(a, (b,a)) FROM test; +┌─argMax(a, tuple(b, a))─┐ +│ c │ -- Есть две строки с b=2, кортеж `Tuple` в функции `Max` позволяет получить не первый `arg`. +└────────────────────────┘ + +SELECT argMax(a, tuple(b)) FROM test; +┌─argMax(a, tuple(b))─┐ +│ b │ -- Кортеж `Tuple` может использоваться в `Max`, чтобы не пропускать `NULL` значения в `Max`. +└─────────────────────┘ +``` + +**Смотрите также** + +- [Tuple](/docs/ru/sql-reference/data-types/tuple.md) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index d757a9192ce..396cd3e646b 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -918,11 +918,13 @@ bool Client::processWithFuzzing(const String & full_query) } -void Client::printHelpMessage(const OptionsDescription & options_description) +void Client::printHelpMessage(const OptionsDescription & options_description, bool verbose) { std::cout << options_description.main_description.value() << "\n"; std::cout << options_description.external_description.value() << "\n"; std::cout << options_description.hosts_and_ports_description.value() << "\n"; + if (verbose) + std::cout << "All settings are documented at https://clickhouse.com/docs/en/operations/settings/settings.\n\n"; std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n"; std::cout << "\nSee also: https://clickhouse.com/docs/en/integrations/sql-clients/cli\n"; } diff --git a/programs/client/Client.h b/programs/client/Client.h index 11d9dec97b1..bef948b3c1e 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -25,7 +25,7 @@ protected: String getName() const override { return "client"; } - void printHelpMessage(const OptionsDescription & options_description) override; + void printHelpMessage(const OptionsDescription & options_description, bool verbose) override; void addOptions(OptionsDescription & options_description) override; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 690e20b25be..612c62613ca 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -774,10 +774,12 @@ void LocalServer::processConfig() } -void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & options_description) +void LocalServer::printHelpMessage(const OptionsDescription & options_description, bool verbose) { std::cout << getHelpHeader() << "\n"; std::cout << options_description.main_description.value() << "\n"; + if (verbose) + std::cout << "All settings are documented at https://clickhouse.com/docs/en/operations/settings/settings.\n\n"; std::cout << getHelpFooter() << "\n"; std::cout << "In addition, --param_name=value can be specified for substitution of parameters for parametrized queries.\n"; std::cout << "\nSee also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/\n"; diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index 9b67aab02d4..4856e68ff9b 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -36,7 +36,7 @@ protected: String getName() const override { return "local"; } - void printHelpMessage(const OptionsDescription & options_description) override; + void printHelpMessage(const OptionsDescription & options_description, bool verbose) override; void addOptions(OptionsDescription & options_description) override; diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index aca0c4d2d53..c3bb42160ad 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -280,11 +280,11 @@ void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration /// Optional improvements in access control system. /// The default values are false because we need to be compatible with earlier access configurations - setEnabledUsersWithoutRowPoliciesCanReadRows(config_.getBool("access_control_improvements.users_without_row_policies_can_read_rows", false)); - setOnClusterQueriesRequireClusterGrant(config_.getBool("access_control_improvements.on_cluster_queries_require_cluster_grant", false)); - setSelectFromSystemDatabaseRequiresGrant(config_.getBool("access_control_improvements.select_from_system_db_requires_grant", false)); - setSelectFromInformationSchemaRequiresGrant(config_.getBool("access_control_improvements.select_from_information_schema_requires_grant", false)); - setSettingsConstraintsReplacePrevious(config_.getBool("access_control_improvements.settings_constraints_replace_previous", false)); + setEnabledUsersWithoutRowPoliciesCanReadRows(config_.getBool("access_control_improvements.users_without_row_policies_can_read_rows", true)); + setOnClusterQueriesRequireClusterGrant(config_.getBool("access_control_improvements.on_cluster_queries_require_cluster_grant", true)); + setSelectFromSystemDatabaseRequiresGrant(config_.getBool("access_control_improvements.select_from_system_db_requires_grant", true)); + setSelectFromInformationSchemaRequiresGrant(config_.getBool("access_control_improvements.select_from_information_schema_requires_grant", true)); + setSettingsConstraintsReplacePrevious(config_.getBool("access_control_improvements.settings_constraints_replace_previous", true)); setTableEnginesRequireGrant(config_.getBool("access_control_improvements.table_engines_require_grant", false)); addStoragesFromMainConfig(config_, config_path_, get_zookeeper_function_); diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index 2789423799a..e62a94389b6 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -165,7 +165,6 @@ private: /** ClickHouse query tree pass manager. * - * TODO: Support setting optimize_monotonous_functions_in_order_by. * TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column). */ diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 59990b9cd51..9cf5c433826 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -221,7 +221,8 @@ std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const Strin key, DBMS_DEFAULT_BUFFER_SIZE, write_settings, - settings); + settings, + threadPoolCallbackRunnerUnsafe(getBackupsIOThreadPool().get(), "BackupWRAzure")); } void BackupWriterAzureBlobStorage::removeFile(const String & file_name) diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 2349c9ac9d1..58268971afe 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -109,7 +109,7 @@ RestorerFromBackup::~RestorerFromBackup() if (getNumFutures() > 0) { LOG_INFO(log, "Waiting for {} tasks to finish", getNumFutures()); - waitFutures(); + waitFutures(/* throw_if_error= */ false); } } @@ -161,7 +161,7 @@ void RestorerFromBackup::run(Mode mode) setStage(Stage::COMPLETED); } -void RestorerFromBackup::waitFutures() +void RestorerFromBackup::waitFutures(bool throw_if_error) { std::exception_ptr error; @@ -176,11 +176,7 @@ void RestorerFromBackup::waitFutures() if (futures_to_wait.empty()) break; - /// Wait for all tasks. - for (auto & future : futures_to_wait) - future.wait(); - - /// Check if there is an exception. + /// Wait for all tasks to finish. for (auto & future : futures_to_wait) { try @@ -197,7 +193,12 @@ void RestorerFromBackup::waitFutures() } if (error) - std::rethrow_exception(error); + { + if (throw_if_error) + std::rethrow_exception(error); + else + tryLogException(error, log); + } } size_t RestorerFromBackup::getNumFutures() const diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index 7b36eea0ba0..6dbca5bced5 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -130,7 +130,7 @@ private: /// Waits until all tasks are processed (including the tasks scheduled while we're waiting). /// Throws an exception if any of the tasks throws an exception. - void waitFutures(); + void waitFutures(bool throw_if_error = true); /// Throws an exception if the RESTORE query was cancelled. void checkIsQueryCancelled() const; diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 78482391df8..8d78c340626 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2955,7 +2955,8 @@ void ClientBase::init(int argc, char ** argv) /// Common options for clickhouse-client and clickhouse-local. options_description.main_description->add_options() - ("help", "produce help message") + ("help", "print usage summary, combine with --verbose to display all options") + ("verbose", "print query and other debugging info") ("version,V", "print version information and exit") ("version-clean", "print version in machine-readable format and exit") @@ -2979,7 +2980,6 @@ void ClientBase::init(int argc, char ** argv) ("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)") ("echo", "in batch mode, print query before execution") - ("verbose", "print query and other debugging info") ("log-level", po::value(), "log level") ("server_logs_file", po::value(), "put server logs into specified file") @@ -3008,6 +3008,8 @@ void ClientBase::init(int argc, char ** argv) addOptions(options_description); + OptionsDescription options_description_non_verbose = options_description; + auto getter = [](const auto & op) { String op_long_name = op->long_name(); @@ -3042,11 +3044,17 @@ void ClientBase::init(int argc, char ** argv) exit(0); // NOLINT(concurrency-mt-unsafe) } + if (options.count("verbose")) + config().setBool("verbose", true); + /// Output of help message. if (options.count("help") || (options.count("host") && options["host"].as() == "elp")) /// If user writes -help instead of --help. { - printHelpMessage(options_description); + if (config().getBool("verbose", false)) + printHelpMessage(options_description, true); + else + printHelpMessage(options_description_non_verbose, false); exit(0); // NOLINT(concurrency-mt-unsafe) } @@ -3113,8 +3121,6 @@ void ClientBase::init(int argc, char ** argv) config().setBool("highlight", options["highlight"].as()); if (options.count("history_file")) config().setString("history_file", options["history_file"].as()); - if (options.count("verbose")) - config().setBool("verbose", true); if (options.count("interactive")) config().setBool("interactive", true); if (options.count("pager")) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 9ee77122e2e..64cbdbe8989 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -121,7 +121,7 @@ protected: }; virtual void updateLoggerLevel(const String &) {} - virtual void printHelpMessage(const OptionsDescription & options_description) = 0; + virtual void printHelpMessage(const OptionsDescription & options_description, bool verbose) = 0; virtual void addOptions(OptionsDescription & options_description) = 0; virtual void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index ebfa2e89ea1..736967f4a68 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -28,28 +28,6 @@ namespace ErrorCodes extern const int USER_SESSION_LIMIT_EXCEEDED; } -Suggest::Suggest() -{ - /// Keywords may be not up to date with ClickHouse parser. - addWords({"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", - "CLUSTER", "DEFAULT", "MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", - "SETTINGS", "ATTACH", "DETACH", "DROP", "RENAME", "TO", "ALTER", "ADD", - "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", "PRIMARY", "KEY", - "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO", - "OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", - "THEN", "ELSE", "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", - "FINAL", "DEDUPLICATE", "INSERT", "VALUES", "SELECT", "DISTINCT", "SAMPLE", "ARRAY", - "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", "LEFT", "RIGHT", - "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", - "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", - "OR", "ASC", "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", - "BETWEEN", "TRUNCATE", "USER", "ROLE", "PROFILE", "QUOTA", "POLICY", "ROW", - "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", "IDENTIFIED", "HOST", - "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", "INTERVAL", - "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "CLEANUP", "APPEND", - "IGNORE NULLS", "RESPECT NULLS", "OVER", "PASTE", "WINDOW", "QUALIFY"}); -} - static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggestion) { /// NOTE: Once you will update the completion list, @@ -82,6 +60,7 @@ static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggesti add_column("name", "data_type_families", false, {}); add_column("name", "merge_tree_settings", false, {}); add_column("name", "settings", false, {}); + add_column("keyword", "keywords", false, {}); if (!basic_suggestion) { diff --git a/src/Client/Suggest.h b/src/Client/Suggest.h index aac8a73f702..d04910f4d00 100644 --- a/src/Client/Suggest.h +++ b/src/Client/Suggest.h @@ -17,7 +17,7 @@ namespace DB class Suggest : public LineReader::Suggest, boost::noncopyable { public: - Suggest(); + Suggest() = default; ~Suggest() { diff --git a/src/Compression/CompressionCodecGCD.cpp b/src/Compression/CompressionCodecGCD.cpp index 8b923d61650..59e11f896f0 100644 --- a/src/Compression/CompressionCodecGCD.cpp +++ b/src/Compression/CompressionCodecGCD.cpp @@ -24,6 +24,7 @@ public: void updateHash(SipHash & hash) const override; protected: + /// 1 byte (`gcd_bytes_size` value) + 1 byte (`bytes_to_skip` value) + `bytes_to_skip` bytes (trash) + `gcd_bytes_size` bytes (gcd value) + (`source_size` - `bytes_to_skip`) bytes (data) UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; @@ -54,7 +55,7 @@ UInt32 CompressionCodecGCD::getMaxCompressedDataSize(UInt32 uncompressed_size) c { return uncompressed_size + gcd_bytes_size // To store gcd - + 2; // Local header + + 2; // Values of `gcd_bytes_size` and `bytes_to_skip` } uint8_t CompressionCodecGCD::getMethodByte() const @@ -147,7 +148,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, if (source_size - sizeof(T) != output_size) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data"); - memcpy(dest, source, source_size); + memcpy(dest, source, source_size - sizeof(T)); return; } @@ -160,6 +161,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, source += sizeof(T); dest += sizeof(T); } + chassert(source == source_end); } } @@ -209,6 +211,8 @@ void CompressionCodecGCD::doDecompressData(const char * source, UInt32 source_si throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress GCD-encoded data. File has wrong header"); UInt8 bytes_to_skip = uncompressed_size % bytes_size; + chassert(bytes_to_skip == static_cast(source[1])); + UInt32 output_size = uncompressed_size - bytes_to_skip; if (static_cast(2 + bytes_to_skip) > source_size) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index fecd3c6bdc5..ed64a30b115 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -112,6 +112,7 @@ class IColumn; M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \ M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \ M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ + M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \ M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \ M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ @@ -604,7 +605,6 @@ class IColumn; M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ - M(Bool, optimize_monotonous_functions_in_order_by, false, "Replace monotonous function with its argument in ORDER BY", 0) \ M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ @@ -707,7 +707,6 @@ class IColumn; M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \ M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \ M(Bool, enable_url_encoding, true, " Allows to enable/disable decoding/encoding path in uri in URL table engine", 0) \ - M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \ M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \ M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \ M(UInt64, max_distributed_depth, 5, "Maximum distributed query depth", 0) \ @@ -934,6 +933,7 @@ class IColumn; MAKE_OBSOLETE(M, Bool, allow_experimental_query_cache, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_alter_materialized_view_structure, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_shared_merge_tree, true) \ + MAKE_OBSOLETE(M, Bool, allow_experimental_database_replicated, true) \ \ MAKE_OBSOLETE(M, Milliseconds, async_insert_stale_timeout_ms, 0) \ MAKE_OBSOLETE(M, StreamingHandleErrorMode, handle_kafka_error_mode, StreamingHandleErrorMode::DEFAULT) \ @@ -978,6 +978,7 @@ class IColumn; MAKE_OBSOLETE(M, Bool, allow_experimental_undrop_table_query, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_s3queue, true) \ MAKE_OBSOLETE(M, Bool, query_plan_optimize_primary_key, true) \ + MAKE_OBSOLETE(M, Bool, optimize_monotonous_functions_in_order_by, false) \ /** The section above is for obsolete settings. Do not add anything there. */ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 5d63a07fe58..eb81800367f 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -92,9 +92,11 @@ static std::map sett {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"}, {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, {"first_day_of_week", "Monday", "Monday", "Added a setting for the first day of the week for date/time functions"}, + {"allow_experimental_database_replicated", false, true, "Database engine Replicated is now in Beta stage"}, {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, - {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."} - }}, + {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."}, + {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, + }}, {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 57dbcad565f..934b7cc5e53 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1139,8 +1139,10 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep } LOG_INFO(log, "All tables are created successfully"); - chassert(max_log_ptr_at_creation || our_log_ptr); UInt32 first_entry_to_mark_finished = new_replica ? max_log_ptr_at_creation : our_log_ptr; + /// NOTE first_entry_to_mark_finished can be 0 if our replica has crashed just after creating its nodes in ZK, + /// so it's a new replica, but after restarting we don't know max_log_ptr_at_creation anymore... + /// It's a very rare case, and it's okay if some queries throw TIMEOUT_EXCEEDED when waiting for all replicas if (first_entry_to_mark_finished) { /// If the replica is new and some of the queries applied during recovery diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp index 066acc250a2..14d5f94ef46 100644 --- a/src/Disks/IDisk.cpp +++ b/src/Disks/IDisk.cpp @@ -154,6 +154,7 @@ void IDisk::copyThroughBuffers( /// Disable parallel write. We already copy in parallel. /// Avoid high memory usage. See test_s3_zero_copy_ttl/test.py::test_move_and_s3_memory_usage write_settings.s3_allow_parallel_part_upload = false; + write_settings.azure_allow_parallel_part_upload = false; asyncCopy(*this, from_path, *to_disk, to_path, copying_thread_pool, results, copy_root_dir, read_settings, write_settings, cancellation_hook); diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index ff4b481eefd..0f45f40288e 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -282,12 +282,17 @@ std::unique_ptr AzureObjectStorage::writeObject( /// NO LOG_TEST(log, "Writing file: {}", object.remote_path); + ThreadPoolCallbackRunnerUnsafe scheduler; + if (write_settings.azure_allow_parallel_part_upload) + scheduler = threadPoolCallbackRunnerUnsafe(getThreadPoolWriter(), "VFSWrite"); + return std::make_unique( client.get(), object.remote_path, buf_size, patchSettings(write_settings), - settings.get()); + settings.get(), + std::move(scheduler)); } void AzureObjectStorage::removeObjectImpl(const StoredObject & object, const SharedAzureClientPtr & client_ptr, bool if_exists) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index fa5e7f1f51e..ed2ed66b0b0 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1169,7 +1169,10 @@ struct ToStartOfHourImpl struct ToYearImpl { static constexpr auto name = "toYear"; - + static UInt16 execute(UInt64 t, const DateLUTImpl & time_zone) + { + return time_zone.toYear(t); + } static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toYear(t); @@ -1217,7 +1220,10 @@ struct ToWeekYearImpl static constexpr auto name = "toWeekYear"; static constexpr Int8 week_mode = 3; - + static UInt16 execute(UInt64 t, const DateLUTImpl & time_zone) + { + return time_zone.toYearWeek(t, week_mode).first; + } static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toYearWeek(t, week_mode).first; @@ -1241,7 +1247,10 @@ struct ToWeekYearImpl struct ToWeekOfWeekYearImpl { static constexpr auto name = "toWeekOfWeekYear"; - + static UInt16 execute(UInt64 t, const DateLUTImpl & time_zone) + { + return time_zone.toISOWeek(t); + } static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toISOWeek(t); @@ -1265,7 +1274,10 @@ struct ToWeekOfWeekYearImpl struct ToQuarterImpl { static constexpr auto name = "toQuarter"; - + static UInt8 execute(UInt64 t, const DateLUTImpl & time_zone) + { + return time_zone.toQuarter(t); + } static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toQuarter(t); @@ -1290,7 +1302,10 @@ struct ToQuarterImpl struct ToMonthImpl { static constexpr auto name = "toMonth"; - + static UInt8 execute(UInt64 t, const DateLUTImpl & time_zone) + { + return time_zone.toMonth(t); + } static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toMonth(t); @@ -1315,7 +1330,10 @@ struct ToMonthImpl struct ToDayOfMonthImpl { static constexpr auto name = "toDayOfMonth"; - + static UInt8 execute(UInt64 t, const DateLUTImpl & time_zone) + { + return time_zone.toDayOfMonth(t); + } static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfMonth(t); @@ -1341,7 +1359,10 @@ struct ToDayOfWeekImpl { static constexpr auto name = "toDayOfWeek"; static constexpr bool value_may_be_string = true; - + static UInt8 execute(UInt64 t, UInt8 mode, const DateLUTImpl & time_zone) + { + return time_zone.toDayOfWeek(t, mode); + } static UInt8 execute(Int64 t, UInt8 mode, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(t, mode); @@ -1365,7 +1386,10 @@ struct ToDayOfWeekImpl struct ToDayOfYearImpl { static constexpr auto name = "toDayOfYear"; - + static UInt16 execute(UInt64 t, const DateLUTImpl & time_zone) + { + return time_zone.toDayOfYear(t); + } static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfYear(t); @@ -1421,7 +1445,10 @@ public: struct ToHourImpl { static constexpr auto name = "toHour"; - + static UInt8 execute(UInt64 t, const DateLUTImpl & time_zone) + { + return time_zone.toHour(t); + } static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toHour(t); @@ -1446,7 +1473,10 @@ struct ToHourImpl struct TimezoneOffsetImpl { static constexpr auto name = "timezoneOffset"; - + static time_t execute(UInt64 t, const DateLUTImpl & time_zone) + { + return time_zone.timezoneOffset(t); + } static time_t execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.timezoneOffset(t); @@ -1474,7 +1504,10 @@ struct TimezoneOffsetImpl struct ToMinuteImpl { static constexpr auto name = "toMinute"; - + static UInt8 execute(UInt64 t, const DateLUTImpl & time_zone) + { + return time_zone.toMinute(t); + } static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toMinute(t); @@ -1499,7 +1532,10 @@ struct ToMinuteImpl struct ToSecondImpl { static constexpr auto name = "toSecond"; - + static UInt8 execute(UInt64 t, const DateLUTImpl & time_zone) + { + return time_zone.toSecond(t); + } static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toSecond(t); @@ -1550,7 +1586,10 @@ struct ToMillisecondImpl struct ToISOYearImpl { static constexpr auto name = "toISOYear"; - + static UInt16 execute(UInt64 t, const DateLUTImpl & time_zone) + { + return time_zone.toISOYear(time_zone.toDayNum(t)); + } static UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toISOYear(time_zone.toDayNum(t)); @@ -1607,7 +1646,10 @@ struct ToStartOfISOYearImpl struct ToISOWeekImpl { static constexpr auto name = "toISOWeek"; - + static UInt8 execute(UInt64 t, const DateLUTImpl & time_zone) + { + return time_zone.toISOWeek(time_zone.toDayNum(t)); + } static UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toISOWeek(time_zone.toDayNum(t)); diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 87438365901..03fcff3af12 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -62,8 +62,8 @@ template <> struct InstructionValueTypeMap { using Instructi template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; -template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; -template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; +template <> struct InstructionValueTypeMap { using InstructionValueType = Int64; }; +template <> struct InstructionValueTypeMap { using InstructionValueType = UInt64; }; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt16; }; template <> struct InstructionValueTypeMap { using InstructionValueType = Int32; }; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; @@ -1017,7 +1017,7 @@ public: else { for (auto & instruction : instructions) - instruction.perform(pos, static_cast(vec[i]), 0, 0, *time_zone); + instruction.perform(pos, static_cast(vec[i]), 0, 0, *time_zone); } *pos++ = '\0'; @@ -1073,7 +1073,7 @@ public: { /// DateTime/DateTime64 --> insert instruction /// Other types cannot provide the requested data --> write out template - if constexpr (is_any_of) + if constexpr (is_any_of) { Instruction instruction; instruction.setMysqlFunc(std::move(func)); @@ -1539,7 +1539,7 @@ public: /// If the argument was DateTime, add instruction for printing. If it was date, just append default literal auto add_instruction = [&]([[maybe_unused]] typename Instruction::FuncJoda && func, [[maybe_unused]] const String & default_literal) { - if constexpr (is_any_of) + if constexpr (is_any_of) { Instruction instruction; instruction.setJodaFunc(std::move(func)); diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h index 7d36677b468..4da3806e51d 100644 --- a/src/IO/WriteSettings.h +++ b/src/IO/WriteSettings.h @@ -23,6 +23,7 @@ struct WriteSettings size_t filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = 1000; bool s3_allow_parallel_part_upload = true; + bool azure_allow_parallel_part_upload = true; /// Monitoring bool for_object_storage = false; // to choose which profile events should be incremented diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index b21ccd5b234..c832473c4cd 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -118,7 +118,7 @@ LockedKeyPtr KeyMetadata::lockNoStateCheck() return std::make_unique(shared_from_this()); } -bool KeyMetadata::createBaseDirectory() +bool KeyMetadata::createBaseDirectory(bool throw_if_failed) { if (!created_base_directory.exchange(true)) { @@ -131,7 +131,7 @@ bool KeyMetadata::createBaseDirectory() { created_base_directory = false; - if (e.code() == std::errc::no_space_on_device) + if (!throw_if_failed && e.code() == std::errc::no_space_on_device) { LOG_TRACE(cache_metadata->log, "Failed to create base directory for key {}, " "because no space left on device", key); diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 1f320f8df5e..31651149998 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -113,7 +113,7 @@ struct KeyMetadata : private std::map, LockedKeyPtr tryLock(); - bool createBaseDirectory(); + bool createBaseDirectory(bool throw_if_failed = false); std::string getPath() const; diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index 9cbb6794359..2ac38aeeca7 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -54,10 +54,18 @@ WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegmentsHolderPtr segment void WriteBufferToFileSegment::nextImpl() { auto downloader [[maybe_unused]] = file_segment->getOrSetDownloader(); - chassert(downloader == FileSegment::getCallerId()); + if (downloader != FileSegment::getCallerId()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Failed to set a downloader (current downloader: {}, file segment info: {})", + downloader, file_segment->getInfoForLog()); + } SCOPE_EXIT({ - file_segment->completePartAndResetDownloader(); + if (file_segment->isDownloader()) + file_segment->completePartAndResetDownloader(); + else + chassert(false); }); size_t bytes_to_write = offset(); diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 07ef7aa6c96..f7727f70ff7 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -245,11 +245,15 @@ void executeQuery( const auto & shard_info = cluster->getShardsInfo()[i]; auto query_for_shard = query_info.query_tree->clone(); - if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) + if (sharding_key_expr && + query_info.optimized_cluster && + settings.optimize_skip_unused_shards_rewrite_in && + shards > 1 && + /// TODO: support composite sharding key + sharding_key_expr->getRequiredColumns().size() == 1) { OptimizeShardingKeyRewriteInVisitor::Data visitor_data{ sharding_key_expr, - sharding_key_expr->getSampleBlock().getByPosition(0).type, sharding_key_column_name, shard_info, not_optimized_cluster->getSlotToShard(), @@ -282,11 +286,15 @@ void executeQuery( const auto & shard_info = cluster->getShardsInfo()[i]; ASTPtr query_ast_for_shard = query_info.query->clone(); - if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) + if (sharding_key_expr && + query_info.optimized_cluster && + settings.optimize_skip_unused_shards_rewrite_in && + shards > 1 && + /// TODO: support composite sharding key + sharding_key_expr->getRequiredColumns().size() == 1) { OptimizeShardingKeyRewriteInVisitor::Data visitor_data{ sharding_key_expr, - sharding_key_expr->getSampleBlock().getByPosition(0).type, sharding_key_column_name, shard_info, not_optimized_cluster->getSlotToShard(), diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 04b761e8b2b..0b63904c3e5 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -5256,6 +5256,7 @@ WriteSettings Context::getWriteSettings() const res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; res.s3_allow_parallel_part_upload = settings.s3_allow_parallel_part_upload; + res.azure_allow_parallel_part_upload = settings.azure_allow_parallel_part_upload; res.remote_throttler = getRemoteWriteThrottler(); res.local_throttler = getLocalWriteThrottler(); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 1f38449fb32..7cda0267fdf 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -258,15 +258,6 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) "Enable allow_experimental_database_materialized_mysql to use it"); } - if (create.storage->engine->name == "Replicated" - && !getContext()->getSettingsRef().allow_experimental_database_replicated - && !internal && !create.attach) - { - throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, - "Replicated is an experimental database engine. " - "Enable allow_experimental_database_replicated to use it"); - } - if (create.storage->engine->name == "MaterializedPostgreSQL" && !getContext()->getSettingsRef().allow_experimental_database_materialized_postgresql && !internal && !create.attach) diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index 84279282d92..54515ea072a 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -38,25 +38,27 @@ Field executeFunctionOnField( return (*ret.column)[0]; } -/// @param sharding_column_value - one of values from IN +/// @param column_value - one of values from IN /// @param sharding_column_name - name of that column /// @return true if shard may contain such value (or it is unknown), otherwise false. bool shardContains( - Field sharding_column_value, + Field column_value, const std::string & sharding_column_name, const OptimizeShardingKeyRewriteInMatcher::Data & data) { + /// Type of column in storage (used for implicit conversion from i.e. String to Int) + const DataTypePtr & column_type = data.sharding_key_expr->getSampleBlock().getByName(sharding_column_name).type; /// Implicit conversion. - sharding_column_value = convertFieldToType(sharding_column_value, *data.sharding_key_type); + column_value = convertFieldToType(column_value, *column_type); /// NULL is not allowed in sharding key, /// so it should be safe to assume that shard cannot contain it. - if (sharding_column_value.isNull()) + if (column_value.isNull()) return false; Field sharding_value = executeFunctionOnField( - sharding_column_value, sharding_column_name, - data.sharding_key_expr, data.sharding_key_type, + column_value, sharding_column_name, + data.sharding_key_expr, column_type, data.sharding_key_column_name); /// The value from IN can be non-numeric, /// but in this case it should be convertible to numeric type, let's try. diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h index 1b2d5a8fc15..78e4f42a912 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h @@ -28,8 +28,6 @@ struct OptimizeShardingKeyRewriteInMatcher { /// Expression of sharding_key for the Distributed() table const ExpressionActionsPtr & sharding_key_expr; - /// Type of sharding_key column. - const DataTypePtr & sharding_key_type; /// Name of the column for sharding_expr const std::string & sharding_key_column_name; /// Info for the current shard (to compare shard_num with calculated) diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index 4a30c0ae726..26a78d53aab 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -111,7 +111,7 @@ FileSegmentsHolderPtr TemporaryDataOnDisk::createCacheFile(size_t max_file_size) CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true), FileCache::getCommonUser()); chassert(holder->size() == 1); - holder->back().getKeyMetadata()->createBaseDirectory(); + holder->back().getKeyMetadata()->createBaseDirectory(/* throw_if_failed */true); return holder; } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 7b979088170..feb417e0c03 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -368,92 +368,6 @@ std::unordered_set getDistinctNames(const ASTSelectQuery & select) return names; } -/// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression, -/// has a single argument and not an aggregate functions. -void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, ContextPtr context, - const TablesWithColumns & tables_with_columns, - const TreeRewriterResult & result) -{ - auto order_by = select_query->orderBy(); - if (!order_by) - return; - - /// Do not apply optimization for Distributed and Merge storages, - /// because we can't get the sorting key of their underlying tables - /// and we can break the matching of the sorting key for `read_in_order` - /// optimization by removing monotonous functions from the prefix of key. - if (result.is_remote_storage || (result.storage && result.storage->getName() == "Merge")) - return; - - for (const auto & child : order_by->children) - { - auto * order_by_element = child->as(); - - if (!order_by_element || order_by_element->children.empty()) - throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST"); - - if (order_by_element->with_fill) - return; - } - - std::unordered_set group_by_hashes; - if (auto group_by = select_query->groupBy()) - { - if (select_query->group_by_with_grouping_sets) - { - for (auto & set : group_by->children) - { - for (auto & elem : set->children) - { - const auto hash = elem->getTreeHash(/*ignore_aliases=*/ true); - const auto key = toString(hash); - group_by_hashes.insert(key); - } - } - } - else - { - for (auto & elem : group_by->children) - { - const auto hash = elem->getTreeHash(/*ignore_aliases=*/ true); - const auto key = toString(hash); - group_by_hashes.insert(key); - } - } - } - - auto sorting_key_columns = result.storage_snapshot ? result.storage_snapshot->metadata->getSortingKeyColumns() : Names{}; - - bool is_sorting_key_prefix = true; - for (size_t i = 0; i < order_by->children.size(); ++i) - { - auto * order_by_element = order_by->children[i]->as(); - - auto & ast_func = order_by_element->children[0]; - if (!ast_func->as()) - continue; - - if (i >= sorting_key_columns.size() || ast_func->getColumnName() != sorting_key_columns[i]) - is_sorting_key_prefix = false; - - /// If order by expression matches the sorting key, do not remove - /// functions to allow execute reading in order of key. - if (is_sorting_key_prefix) - continue; - - MonotonicityCheckVisitor::Data data{tables_with_columns, context, group_by_hashes}; - MonotonicityCheckVisitor(data).visit(ast_func); - - if (!data.isRejected()) - { - ast_func = data.identifier->clone(); - ast_func->setAlias(""); - if (!data.monotonicity.is_positive) - order_by_element->direction *= -1; - } - } -} - /// If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x. /// Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y /// in case if f(), g(), h(), t() are deterministic (in scope of query). @@ -789,10 +703,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, if (settings.optimize_redundant_functions_in_order_by) optimizeRedundantFunctionsInOrderBy(select_query, context); - /// Replace monotonous functions with its argument - if (settings.optimize_monotonous_functions_in_order_by) - optimizeMonotonousFunctionsInOrderBy(select_query, context, tables_with_columns, result); - /// Remove duplicate items from ORDER BY. /// Execute it after all order by optimizations, /// because they can produce duplicated columns. diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index a483ac92f39..d6593aa3365 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -357,6 +357,16 @@ bool ParserFilterClause::parseImpl(Pos & pos, ASTPtr & node, Expected & expected return false; } + if (function.name == "count") + { + /// Remove child from function.arguments if it's '*' because countIf(*) is not supported. + /// See https://github.com/ClickHouse/ClickHouse/issues/61004 + std::erase_if(function.arguments->children, [] (const ASTPtr & child) + { + return typeid_cast(child.get()) || typeid_cast(child.get()); + }); + } + function.name += "If"; function.arguments->children.push_back(condition->children[0]); return true; diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp index 8012dbb37c6..2ed551851e8 100644 --- a/src/Parsers/obfuscateQueries.cpp +++ b/src/Parsers/obfuscateQueries.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -11,6 +11,12 @@ #include #include +#include +#include +#include + +#include + namespace DB { @@ -24,413 +30,54 @@ namespace ErrorCodes namespace { -const std::unordered_set keywords +const std::unordered_set & getObfuscateKeywords() { - "!=", - "", - "%", - "*", - "+", - "-", - "->", - ".", - "/", - ":", - "::", - "<", - "<=", - "<>", - "=", - "==", - "<=>", - ">", - ">=", - "?", - "[", - "]+", - "]+|[", - "^[", - "||", - "]+$", - "ACCESS", - "ACTION", - "ADD", - "ADMIN", - "AFTER", - "ALGORITHM", - "ALIAS", - "ALL", - "ALLOWED_LATENESS", - "ALTER", - "AND", - "ANTI", - "ANY", - "APPLY", - "ARRAY", - "AS", - "ASC", - "ASCENDING", - "ASOF", - "ASSUME", - "AST", - "ASYNC", - "ATTACH", - "AUTO_INCREMENT", - "BACKUP", - "BASE_BACKUP", - "BEGIN", - "BETWEEN", - "BIDIRECTIONAL", - "BOTH", - "BY", - "CACHE", - "CACHES", - "CASCADE", - "CASE", - "CASEWITHEXPRESSION", - "CAST", - "CHANGE", - "CHANGEABLE_IN_READONLY", - "CHANGED", - "CHAR", - "CHARACTER", - "CHECK", - "CLEANUP", - "CLEAR", - "CLUSTER", - "CLUSTER_HOST_IDS", - "CLUSTERS", - "CN", - "CODEC", - "COLLATE", - "COLLECTION", - "COLUMN", - "COLUMNS", - "COMMENT", - "COMMIT", - "COMPRESSION", - "CONCAT", - "CONSTRAINT", - "CREATE", - "CROSS", - "CUBE", - "CURRENT", - "CURRENT_USER", - "DATABASE", - "DATABASES", - "DATE", - "DATE_ADD", - "DATEADD", - "DATE_DIFF", - "DATEDIFF", - "DATE_SUB", - "DATESUB", - "DAY", - "DD", - "DDL", - "DEDUPLICATE", - "DEFAULT", - "DELAY", - "DELETE", - "DESC", - "DESCENDING", - "DESCRIBE", - "DETACH", - "DETACHED", - "DICTIONARIES", - "DICTIONARY", - "DISK", - "DISTINCT", - "DIV", - "DOUBLE_SHA1_HASH", - "DROP", - "ELSE", - "EMPTY", - "ENABLED", - "END", - "ENFORCED", - "ENGINE", - "EPHEMERAL", - "EQUALS", - "ESTIMATE", - "EVENT", - "EVENTS", - "EXCEPT", - "EXCHANGE", - "EXISTS", - "EXPLAIN", - "EXPRESSION", - "EXTERNAL", - "EXTRACT", - "FALSE", - "FETCH", - "FILE", - "FILESYSTEM", - "FILL", - "FILTER", - "FINAL", - "FIRST", - "FOLLOWING", - "FOR", - "FOREIGN", - "FORMAT", - "FREEZE", - "FROM", - "FULL", - "FULLTEXT", - "FUNCTION", - "GLOBAL", - "GRANT", - "GRANTEES", - "GRANTS", - "GRANULARITY", - "GREATER", - "GREATEROREQUALS", - "GROUP", - "GROUPING", - "GROUPS", - "HASH", - "HAVING", - "HDFS", - "HH", - "HIERARCHICAL", - "HOST", - "HOUR", - "ID", - "IDENTIFIED", - "IF", - "ILIKE", - "IN", - "INDEX", - "INFILE", - "INHERIT", - "INJECTIVE", - "INNER", - "INSERT", - "INTERPOLATE", - "INTERSECT", - "INTERVAL", - "INTO", - "INVISIBLE", - "IP", - "IS", - "IS_OBJECT_ID", - "JOIN", - "KEY", - "KEYED", - "KILL", - "LAMBDA", - "LARGE", - "LAST", - "LAYOUT", - "LEADING", - "LEFT", - "LESS", - "LESSOREQUALS", - "LEVEL", - "LIFETIME", - "LIKE", - "LIMIT", - "LIMITS", - "LINEAR", - "LIST", - "LITERAL", - "LIVE", - "LOCAL", - "LTRIM", - "MATCH", - "MATERIALIZE", - "MATERIALIZED", - "MAX", - "MCS", - "MEMORY", - "MI", - "MICROSECOND", - "MILLISECOND", - "MIN", - "MINUS", - "MINUTE", - "MM", - "MOD", - "MODIFY", - "MONTH", - "MOVE", - "MS", - "MULTIIF", - "MUTATION", - "NAME", - "NAMED", - "NANOSECOND", - "NEXT", - "NO", - "NONE", - "NOT", - "NOTEQUALS", - "NOTIN", - "NS", - "NULL", - "NULLS", - "OBJECT", - "OFFSET", - "ON", - "ONLY", - "OPTIMIZE", - "OPTION", - "OR", - "ORDER", - "OUTER", - "OUTFILE", - "OVER", - "OVERRIDE", - "PART", - "PARTIAL", - "PARTITION", - "PARTITIONS", - "PART_MOVE_TO_SHARD", - "PERMANENTLY", - "PERMISSIVE", - "PIPELINE", - "PLAN", - "PLUS", - "POLICY", - "POPULATE", - "POSITION", - "PRECEDING", - "PRECISION", - "PREWHERE", - "PRIMARY", - "PRIVILEGES", - "PROCESSLIST", - "PROFILE", - "PROJECTION", - "QQ", - "QUARTER", - "QUERY", - "QUOTA", - "RANDOMIZED", - "RANGE", - "READONLY", - "REALM", - "RECOMPRESS", - "REFERENCES", - "REFRESH", - "REGEXP", - "REGEXPQUOTEMETA", - "REMOVE", - "RENAME", - "REPLACE", - "REPLACEREGEXPALL", - "REPLACEREGEXPONE", - "RESET", - "RESTORE", - "RESTRICT", - "RESTRICTIVE", - "RESUME", - "REVOKE", - "RIGHT", - "ROLE", - "ROLES", - "ROLLBACK", - "ROLLUP", - "ROW", - "ROWS", - "RTRIM", - "S3", - "SALT", - "SAMPLE", - "SECOND", - "SELECT", - "SEMI", - "SERVER", - "SET", - "SETS", - "SETTING", - "SETTINGS", - "SHA256_HASH", - "SHARD", - "SHOW", - "SIGNED", - "SIMPLE", - "SINGLEVALUEORNULL", - "SNAPSHOT", - "SOURCE", - "SPATIAL", - "SS", - "STDOUT", - "STEP", - "STORAGE", - "STRICT", - "STRICTLY_ASCENDING", - "SUBPARTITION", - "SUBPARTITIONS", - "SUBSTRING", - "SUSPEND", - "SYNC", - "SYNTAX", - "SYSTEM", - "TABLE", - "TABLES", - "TEMPORARY", - "TEST", - "THAN", - "THEN", - "TIES", - "TIMESTAMP", - "TIMESTAMP_ADD", - "TIMESTAMPADD", - "TIMESTAMP_DIFF", - "TIMESTAMPDIFF", - "TIMESTAMP_SUB", - "TIMESTAMPSUB", - "TO", - "TODATE", - "TODATETIME", - "TOP", - "TOTALS", - "TRACKING", - "TRAILING", - "TRANSACTION", - "TREE", - "TRIGGER", - "TRIM", - "TRIMBOTH", - "TRIMLEFT", - "TRIMRIGHT", - "TRUE", - "TRUNCATE", - "TTL", - "TUPLE", - "TYPE", - "UNBOUNDED", - "UNFREEZE", - "UNION", - "UNIQUE", - "UNSIGNED", - "UNTUPLE", - "UPDATE", - "URL", - "USE", - "USER", - "USING", - "UUID", - "VALUES", - "VARYING", - "VIEW", - "VIEWIFPERMITTED", - "VISIBLE", - "VOLUME", - "WATCH", - "WATERMARK", - "WEEK", - "WHEN", - "WHERE", - "WINDOW", - "WITH", - "WK", - "WRITABLE", - "YEAR", - "YYYY", - "ZKPATH" + auto initialize = []() + { + std::unordered_set instance = { + "!=", + "", + "%", + "*", + "+", + "-", + "->", + ".", + "/", + ":", + "::", + "<", + "<=", + "<>", + "=", + "==", + "<=>", + ">", + ">=", + "?", + "[", + "]+", + "]+|[", + "^[", + "||", + "]+$" + }; + + for (const auto & keyword : getAllKeyWords()) + { + /// The keyword may consist of several tokens (ORDER BY or GROUP BY) + /// We will split them and add separately. + std::vector tokens; + boost::split(tokens, keyword, [](char c) { return c == ' '; }); + for (const auto & token : tokens) + instance.insert(token); + } + + return instance; + }; + + static std::unordered_set instance = initialize(); + return instance; }; /// We want to keep some words inside quotes. For example we want to keep HOUR inside: @@ -1309,11 +956,9 @@ void obfuscateQueries( if (token.type == TokenType::BareWord) { - std::string whole_token_uppercase(whole_token); - Poco::toUpperInPlace(whole_token_uppercase); + auto whole_token_uppercase = Poco::toUpper(toString(whole_token)); - if (keywords.contains(whole_token_uppercase) - || known_identifier_func(whole_token)) + if (getObfuscateKeywords().contains(whole_token_uppercase) || known_identifier_func(whole_token)) { /// Keep keywords as is. result.write(token.begin, token.size()); diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 72135157117..394bb4722f8 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -1,6 +1,7 @@ #include "GRPCServer.h" #include #include +#include #if USE_GRPC #include @@ -320,8 +321,27 @@ namespace Poco::Net::SocketAddress getClientAddress() const { - String peer = grpc_context.peer(); - return Poco::Net::SocketAddress{peer.substr(peer.find(':') + 1)}; + /// Returns a string like ipv4:127.0.0.1:55930 or ipv6:%5B::1%5D:55930 + String uri_encoded_peer = grpc_context.peer(); + + constexpr const std::string_view ipv4_prefix = "ipv4:"; + constexpr const std::string_view ipv6_prefix = "ipv6:"; + + bool ipv4 = uri_encoded_peer.starts_with(ipv4_prefix); + bool ipv6 = uri_encoded_peer.starts_with(ipv6_prefix); + + if (!ipv4 && !ipv6) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ipv4 or ipv6 protocol in peer address, got {}", uri_encoded_peer); + + auto prefix = ipv4 ? ipv4_prefix : ipv6_prefix; + auto family = ipv4 ? Poco::Net::AddressFamily::Family::IPv4 : Poco::Net::AddressFamily::Family::IPv6; + + uri_encoded_peer= uri_encoded_peer.substr(prefix.length()); + + String peer; + Poco::URI::decode(uri_encoded_peer, peer); + + return Poco::Net::SocketAddress{family, peer}; } std::optional getClientHeader(const String & key) const diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 81db7c04eb3..4d29e4d6a87 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -364,12 +364,12 @@ bool HTTPHandler::authenticateUser( /// The header 'X-ClickHouse-SSL-Certificate-Auth: on' enables checking the common name /// extracted from the SSL certificate used for this connection instead of checking password. bool has_ssl_certificate_auth = (request.get("X-ClickHouse-SSL-Certificate-Auth", "") == "on"); - bool has_auth_headers = !user.empty() || !password.empty() || !quota_key.empty() || has_ssl_certificate_auth; + bool has_auth_headers = !user.empty() || !password.empty() || has_ssl_certificate_auth; /// User name and password can be passed using HTTP Basic auth or query parameters /// (both methods are insecure). bool has_http_credentials = request.hasCredentials(); - bool has_credentials_in_query_params = params.has("user") || params.has("password") || params.has("quota_key"); + bool has_credentials_in_query_params = params.has("user") || params.has("password"); std::string spnego_challenge; std::string certificate_common_name; @@ -435,15 +435,12 @@ bool HTTPHandler::authenticateUser( { throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme); } - - quota_key = params.get("quota_key", ""); } else { /// If the user name is not set we assume it's the 'default' user. user = params.get("user", "default"); password = params.get("password", ""); - quota_key = params.get("quota_key", ""); } if (!certificate_common_name.empty()) @@ -495,6 +492,16 @@ bool HTTPHandler::authenticateUser( basic_credentials->setPassword(password); } + if (params.has("quota_key")) + { + if (!quota_key.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Invalid authentication: it is not allowed " + "to use quota key as HTTP header and as parameter simultaneously"); + + quota_key = params.get("quota_key"); + } + /// Set client info. It will be used for quota accounting parameters in 'setUser' method. session->setHTTPClientInfo(request); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 4e3d6ab69f6..c5b50f7090d 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -793,6 +793,8 @@ bool TCPHandler::readDataNext() /// We accept and process data. read_ok = receivePacket(); + /// Reset the timeout on Ping packet (NOTE: there is no Ping for INSERT queries yet). + watch.restart(); break; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index fcab606130d..814db3172b4 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8371,4 +8371,29 @@ bool MergeTreeData::initializeDiskOnConfigChange(const std::set & new_ad } return true; } + +bool updateAlterConversionsMutations(const MutationCommands & commands, std::atomic & alter_conversions_mutations, bool remove) +{ + for (const auto & command : commands) + { + if (AlterConversions::supportsMutationCommandType(command.type)) + { + if (remove) + { + --alter_conversions_mutations; + if (alter_conversions_mutations < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly mutations counter is negative ({})", alter_conversions_mutations); + } + else + { + if (alter_conversions_mutations < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "On-fly mutations counter is negative ({})", alter_conversions_mutations); + ++alter_conversions_mutations; + } + return true; + } + } + return false; +} + } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 3b7ff1454a5..4ed2e884990 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1708,4 +1708,8 @@ struct CurrentlySubmergingEmergingTagger || (settings.min_compressed_bytes_to_fsync_after_merge && input_bytes >= settings.min_compressed_bytes_to_fsync_after_merge)); } +/// Look at MutationCommands if it contains mutations for AlterConversions, update the counter. +/// Return true if the counter had been updated +bool updateAlterConversionsMutations(const MutationCommands & commands, std::atomic & alter_conversions_mutations, bool remove); + } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index af042ced5a0..d6c36d12bf5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -6,8 +6,11 @@ #include #include #include +#include #include #include +#include +#include #include #include #include @@ -942,7 +945,14 @@ int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper mutations_by_partition.erase(partition_and_block_num.first); } - it = mutations_by_znode.erase(it); + if (!it->second.is_done) + { + const auto commands = entry.commands; + it = mutations_by_znode.erase(it); + updateAlterConversionsMutations(commands, alter_conversions_mutations, /* remove= */ true); + } + else + it = mutations_by_znode.erase(it); } else ++it; @@ -991,12 +1001,15 @@ int32_t ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper auto & mutation = mutations_by_znode.emplace(entry->znode_name, MutationStatus(entry, format_version)) .first->second; - for (const auto & pair : entry->block_numbers) - { - const String & partition_id = pair.first; - Int64 block_num = pair.second; - mutations_by_partition[partition_id].emplace(block_num, &mutation); - } + updateAlterConversionsMutations(entry->commands, alter_conversions_mutations, /* remove= */ false); + NOEXCEPT_SCOPE({ + for (const auto & pair : entry->block_numbers) + { + const String & partition_id = pair.first; + Int64 block_num = pair.second; + mutations_by_partition[partition_id].emplace(block_num, &mutation); + } + }); LOG_TRACE(log, "Adding mutation {} for {} partitions (data versions: {})", entry->znode_name, entry->block_numbers.size(), entry->getBlockNumbersForLogs()); @@ -1062,6 +1075,8 @@ ReplicatedMergeTreeMutationEntryPtr ReplicatedMergeTreeQueue::removeMutation( } mutations_by_znode.erase(it); + /// updateAlterConversionsMutations() will be called in updateMutations() + LOG_DEBUG(log, "Removed mutation {} from local state.", entry->znode_name); } @@ -1887,6 +1902,15 @@ ReplicatedMergeTreeMergePredicate ReplicatedMergeTreeQueue::getMergePredicate(zk MutationCommands ReplicatedMergeTreeQueue::getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const { + int32_t part_metadata_version = part->getMetadataVersion(); + int32_t metadata_version = storage.getInMemoryMetadataPtr()->getMetadataVersion(); + + chassert(alter_conversions_mutations >= 0); + /// NOTE: that just checking part_metadata_version is not enough, since we + /// need to check for non-metadata mutations as well. + if (alter_conversions_mutations == 0 && metadata_version == part_metadata_version) + return {}; + std::unique_lock lock(state_mutex); auto in_partition = mutations_by_partition.find(part->info.partition_id); @@ -1894,13 +1918,18 @@ MutationCommands ReplicatedMergeTreeQueue::getAlterMutationCommandsForPart(const return {}; Int64 part_data_version = part->info.getDataVersion(); - Int64 part_metadata_version = part->getMetadataVersion(); - MutationCommands result; bool seen_all_data_mutations = false; bool seen_all_metadata_mutations = false; + auto add_to_result = [&](const ReplicatedMergeTreeMutationEntryPtr & entry) + { + for (const auto & command : entry->commands | std::views::reverse) + if (AlterConversions::supportsMutationCommandType(command.type)) + result.emplace_back(command); + }; + /// Here we return mutation commands for part which has bigger alter version than part metadata version. /// Please note, we don't use getDataVersion(). It's because these alter commands are used for in-fly conversions /// of part's metadata. @@ -1911,28 +1940,22 @@ MutationCommands ReplicatedMergeTreeQueue::getAlterMutationCommandsForPart(const auto & entry = mutation_status->entry; - auto add_to_result = [&] { - for (const auto & command : entry->commands | std::views::reverse) - if (AlterConversions::supportsMutationCommandType(command.type)) - result.emplace_back(command); - }; - auto alter_version = entry->alter_version; if (alter_version != -1) { - if (alter_version > storage.getInMemoryMetadataPtr()->getMetadataVersion()) + if (alter_version > metadata_version) continue; /// We take commands with bigger metadata version if (alter_version > part_metadata_version) - add_to_result(); + add_to_result(entry); else seen_all_metadata_mutations = true; } else { if (mutation_version > part_data_version) - add_to_result(); + add_to_result(entry); else seen_all_data_mutations = true; } @@ -2019,6 +2042,8 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep "were executed concurrently on different replicas.", znode); mutation.parts_to_do.clear(); } + + updateAlterConversionsMutations(mutation.entry->commands, alter_conversions_mutations, /* remove= */ true); } else if (mutation.parts_to_do.size() == 0) { @@ -2075,6 +2100,7 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep LOG_TRACE(log, "Finishing data alter with version {} for entry {}", entry->alter_version, entry->znode_name); alter_sequence.finishDataAlter(entry->alter_version, lock); } + updateAlterConversionsMutations(entry->commands, alter_conversions_mutations, /* remove= */ true); } } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index df4176f5e3d..89ef6240558 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -151,6 +151,8 @@ private: /// Mapping from znode path to Mutations Status std::map mutations_by_znode; + /// Unfinished mutations that is required AlterConversions (see getAlterMutationCommandsForPart()) + std::atomic alter_conversions_mutations = 0; /// Partition -> (block_number -> MutationStatus) std::unordered_map> mutations_by_partition; /// Znode ID of the latest mutation that is done. diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 0235a74400c..da36d045fc8 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -521,9 +521,18 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context String mutation_id = entry.file_name; if (txn) txn->addMutation(shared_from_this(), mutation_id); + + bool alter_conversions_mutations_updated = updateAlterConversionsMutations(entry.commands, alter_conversions_mutations, /* remove= */ false); bool inserted = current_mutations_by_version.try_emplace(version, std::move(entry)).second; if (!inserted) + { + if (alter_conversions_mutations_updated) + { + --alter_conversions_mutations; + chassert(alter_conversions_mutations >= 0); + } throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", version); + } LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info); } @@ -559,6 +568,8 @@ void StorageMergeTree::updateMutationEntriesErrors(FutureMergedMutatedPartPtr re entry.latest_fail_reason.clear(); if (static_cast(result_part->part_info.mutation) == it->first) mutation_backoff_policy.removePartFromFailed(failed_part->name); + + updateAlterConversionsMutations(it->second.commands, alter_conversions_mutations, /* remove= */ true); } } else @@ -837,8 +848,20 @@ CancellationCode StorageMergeTree::killMutation(const String & mutation_id) auto it = current_mutations_by_version.find(mutation_version); if (it != current_mutations_by_version.end()) { + bool mutation_finished = true; + if (std::optional min_version = getMinPartDataVersion()) + mutation_finished = *min_version > static_cast(mutation_version); + to_kill.emplace(std::move(it->second)); - current_mutations_by_version.erase(it); + + if (!mutation_finished) + { + const auto commands = it->second.commands; + current_mutations_by_version.erase(it); + updateAlterConversionsMutations(commands, alter_conversions_mutations, /* remove= */ true); + } + else + current_mutations_by_version.erase(it); } } @@ -916,6 +939,7 @@ void StorageMergeTree::loadMutations() auto inserted = current_mutations_by_version.try_emplace(block_number, std::move(entry)).second; if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", block_number); + updateAlterConversionsMutations(entry.commands, alter_conversions_mutations, /* remove= */ false); } else if (startsWith(it->name(), "tmp_mutation_")) { @@ -2159,10 +2183,6 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, ContextPtr local_context) { - auto lock1 = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); - auto lock2 = dest_table->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); - auto merges_blocker = stopMergesAndWait(); - auto dest_table_storage = std::dynamic_pointer_cast(dest_table); if (!dest_table_storage) throw Exception(ErrorCodes::NOT_IMPLEMENTED, @@ -2176,6 +2196,13 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const this->getStoragePolicy()->getName(), dest_table_storage->getStorageID().getNameForLogs(), dest_table_storage->getStoragePolicy()->getName()); + // Use the same back-pressure (delay/throw) logic as for INSERTs to be consistent and avoid possibility of exceeding part limits using MOVE PARTITION queries + dest_table_storage->delayInsertOrThrowIfNeeded(nullptr, local_context, true); + + auto lock1 = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); + auto lock2 = dest_table->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); + auto merges_blocker = stopMergesAndWait(); + auto dest_metadata_snapshot = dest_table->getInMemoryMetadataPtr(); auto metadata_snapshot = getInMemoryMetadataPtr(); Stopwatch watch; @@ -2409,6 +2436,13 @@ void StorageMergeTree::attachRestoredParts(MutableDataPartsVector && parts) MutationCommands StorageMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const { + /// NOTE: there is no need to check part metadata_version, since + /// ALTER_METADATA cannot be done asynchronously, like in + /// ReplicatedMergeTree. + chassert(alter_conversions_mutations >= 0); + if (alter_conversions_mutations == 0) + return {}; + std::lock_guard lock(currently_processing_in_background_mutex); UInt64 part_data_version = part->info.getDataVersion(); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index d6e97c55604..4d819508934 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -147,6 +147,8 @@ private: DataParts currently_merging_mutating_parts; std::map current_mutations_by_version; + /// Unfinished mutations that is required AlterConversions (see getAlterMutationCommandsForPart()) + std::atomic alter_conversions_mutations = 0; std::atomic shutdown_called {false}; std::atomic flush_called {false}; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c425035dfba..837ff68417f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8244,10 +8244,6 @@ void StorageReplicatedMergeTree::replacePartitionFrom( void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, ContextPtr query_context) { - auto lock1 = lockForShare(query_context->getCurrentQueryId(), query_context->getSettingsRef().lock_acquire_timeout); - auto lock2 = dest_table->lockForShare(query_context->getCurrentQueryId(), query_context->getSettingsRef().lock_acquire_timeout); - auto storage_settings_ptr = getSettings(); - auto dest_table_storage = std::dynamic_pointer_cast(dest_table); if (!dest_table_storage) throw Exception(ErrorCodes::NOT_IMPLEMENTED, @@ -8261,6 +8257,13 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta this->getStoragePolicy()->getName(), getStorageID().getNameForLogs(), dest_table_storage->getStoragePolicy()->getName()); + // Use the same back-pressure (delay/throw) logic as for INSERTs to be consistent and avoid possibility of exceeding part limits using MOVE PARTITION queries + dest_table_storage->delayInsertOrThrowIfNeeded(nullptr, query_context, true); + + auto lock1 = lockForShare(query_context->getCurrentQueryId(), query_context->getSettingsRef().lock_acquire_timeout); + auto lock2 = dest_table->lockForShare(query_context->getCurrentQueryId(), query_context->getSettingsRef().lock_acquire_timeout); + auto storage_settings_ptr = getSettings(); + auto dest_metadata_snapshot = dest_table->getInMemoryMetadataPtr(); auto metadata_snapshot = getInMemoryMetadataPtr(); diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 17375f31e06..f97d5a658ad 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1261,18 +1261,6 @@ def _print_results(result: Any, outfile: Optional[str], pretty: bool = False) -> raise AssertionError(f"Unexpected type for 'res': {type(result)}") -def _update_config_for_docs_only(jobs_data: dict) -> None: - DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK] - print(f"NOTE: Will keep only docs related jobs: [{DOCS_CHECK_JOBS}]") - jobs_to_do = jobs_data.get("jobs_to_do", []) - jobs_data["jobs_to_do"] = [job for job in jobs_to_do if job in DOCS_CHECK_JOBS] - jobs_data["jobs_to_wait"] = { - job: params - for job, params in jobs_data["jobs_to_wait"].items() - if job in DOCS_CHECK_JOBS - } - - def _configure_docker_jobs(docker_digest_or_latest: bool) -> Dict: print("::group::Docker images check") # generate docker jobs data @@ -1332,8 +1320,31 @@ def _configure_jobs( jobs_to_skip: List[str] = [] digests: Dict[str, str] = {} + # FIXME: find better place for these config variables + DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK] + MQ_JOBS = [JobNames.STYLE_CHECK, JobNames.FAST_TEST] + # Must always calculate digest for these jobs for CI Cache to function (they define s3 paths where records are stored) + REQUIRED_DIGESTS = [JobNames.DOCS_CHECK, Build.PACKAGE_RELEASE] + if pr_info.has_changes_in_documentation_only(): + print(f"WARNING: Only docs are changed - will run only [{DOCS_CHECK_JOBS}]") + if pr_info.is_merge_queue: + print(f"WARNING: It's a MQ run - will run only [{MQ_JOBS}]") + print("::group::Job Digests") for job in CI_CONFIG.job_generator(pr_info.head_ref if CI else "dummy_branch_name"): + if ( + pr_info.is_merge_queue + and job not in MQ_JOBS + and job not in REQUIRED_DIGESTS + ): + # We still need digest for JobNames.DOCS_CHECK since CiCache depends on it (FIXME) + continue + if ( + pr_info.has_changes_in_documentation_only() + and job not in DOCS_CHECK_JOBS + and job not in REQUIRED_DIGESTS + ): + continue digest = job_digester.get_job_digest(CI_CONFIG.get_digest_config(job)) digests[job] = digest print(f" job [{job.rjust(50)}] has digest [{digest}]") @@ -1354,6 +1365,8 @@ def _configure_jobs( batches_to_do: List[int] = [] add_to_skip = False + if pr_info.is_merge_queue and job not in MQ_JOBS: + continue if job_config.pr_only and pr_info.is_release_branch: continue if job_config.release_only and not pr_info.is_release_branch: @@ -1435,12 +1448,6 @@ def _configure_jobs( jobs_to_do, jobs_to_skip, jobs_params ) - if pr_info.is_merge_queue: - # FIXME: Quick support for MQ workflow which is only StyleCheck for now - jobs_to_do = [JobNames.STYLE_CHECK] - jobs_to_skip = [] - print(f"NOTE: This is Merge Queue CI: set jobs to do: [{jobs_to_do}]") - return { "digests": digests, "jobs_to_do": jobs_to_do, @@ -1904,9 +1911,6 @@ def main() -> int: else {} ) - if not args.skip_jobs and pr_info.has_changes_in_documentation_only(): - _update_config_for_docs_only(jobs_data) - if not args.skip_jobs: ci_cache = CiCache(s3, jobs_data["digests"]) @@ -1930,8 +1934,7 @@ def main() -> int: jobs_to_skip.append(job) del jobs_params[job] - # set planned jobs as pending in the CI cache if on the master - if pr_info.is_master: + # set planned jobs as in-progress in CI cache for job in jobs_data["jobs_to_do"]: config = CI_CONFIG.get_job_config(job) if config.run_always or config.run_by_label: diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index b23d3d511fd..bdfff12db0b 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -49,7 +49,7 @@ class CILabels(metaclass=WithIter): CI_SET_FAST = "ci_set_fast" CI_SET_ARM = "ci_set_arm" CI_SET_INTEGRATION = "ci_set_integration" - CI_SET_ANALYZER = "ci_set_analyzer" + CI_SET_OLD_ANALYZER = "ci_set_old_analyzer" CI_SET_STATLESS = "ci_set_stateless" CI_SET_STATEFUL = "ci_set_stateful" CI_SET_STATLESS_ASAN = "ci_set_stateless_asan" @@ -98,15 +98,16 @@ class JobNames(metaclass=WithIter): STATELESS_TEST_TSAN = "Stateless tests (tsan)" STATELESS_TEST_MSAN = "Stateless tests (msan)" STATELESS_TEST_UBSAN = "Stateless tests (ubsan)" - STATELESS_TEST_ANALYZER_S3_REPLICATED_RELEASE = ( + STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE = ( "Stateless tests (release, old analyzer, s3, DatabaseReplicated)" ) - # merged into STATELESS_TEST_ANALYZER_S3_REPLICATED_RELEASE: - # STATELESS_TEST_ANALYZER_RELEASE = "Stateless tests (release, analyzer)" + # merged into STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: + # STATELESS_TEST_OLD_ANALYZER_RELEASE = "Stateless tests (release, analyzer)" # STATELESS_TEST_DB_REPL_RELEASE = "Stateless tests (release, DatabaseReplicated)" # STATELESS_TEST_S3_RELEASE = "Stateless tests (release, s3 storage)" STATELESS_TEST_S3_DEBUG = "Stateless tests (debug, s3 storage)" STATELESS_TEST_S3_TSAN = "Stateless tests (tsan, s3 storage)" + STATELESS_TEST_AZURE_ASAN = "Stateless tests (azure, asan)" STATELESS_TEST_FLAKY_ASAN = "Stateless tests flaky check (asan)" STATEFUL_TEST_DEBUG = "Stateful tests (debug)" @@ -129,10 +130,11 @@ class JobNames(metaclass=WithIter): STRESS_TEST_UBSAN = "Stress test (ubsan)" STRESS_TEST_MSAN = "Stress test (msan)" STRESS_TEST_DEBUG = "Stress test (debug)" + STRESS_TEST_AZURE_TSAN = "Stress test (azure, tsan)" INTEGRATION_TEST = "Integration tests (release)" INTEGRATION_TEST_ASAN = "Integration tests (asan)" - INTEGRATION_TEST_ASAN_ANALYZER = "Integration tests (asan, old analyzer)" + INTEGRATION_TEST_ASAN_OLD_ANALYZER = "Integration tests (asan, old analyzer)" INTEGRATION_TEST_TSAN = "Integration tests (tsan)" INTEGRATION_TEST_ARM = "Integration tests (aarch64)" INTEGRATION_TEST_FLAKY = "Integration tests flaky check (asan)" @@ -846,14 +848,14 @@ CI_CONFIG = CIConfig( JobNames.INTEGRATION_TEST, ] ), - CILabels.CI_SET_ANALYZER: LabelConfig( + CILabels.CI_SET_OLD_ANALYZER: LabelConfig( run_jobs=[ JobNames.STYLE_CHECK, JobNames.FAST_TEST, Build.PACKAGE_RELEASE, Build.PACKAGE_ASAN, - JobNames.STATELESS_TEST_ANALYZER_S3_REPLICATED_RELEASE, - JobNames.INTEGRATION_TEST_ASAN_ANALYZER, + JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE, + JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER, ] ), CILabels.CI_SET_STATLESS: LabelConfig( @@ -1193,7 +1195,7 @@ CI_CONFIG = CIConfig( JobNames.STATELESS_TEST_AARCH64: TestConfig( Build.PACKAGE_AARCH64, job_config=JobConfig(**statless_test_common_params) # type: ignore ), - JobNames.STATELESS_TEST_ANALYZER_S3_REPLICATED_RELEASE: TestConfig( + JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: TestConfig( Build.PACKAGE_RELEASE, job_config=JobConfig(num_batches=4, **statless_test_common_params), # type: ignore ), @@ -1201,6 +1203,10 @@ CI_CONFIG = CIConfig( Build.PACKAGE_DEBUG, job_config=JobConfig(num_batches=6, **statless_test_common_params), # type: ignore ), + JobNames.STATELESS_TEST_AZURE_ASAN: TestConfig( + Build.PACKAGE_ASAN, + job_config=JobConfig(num_batches=4, **statless_test_common_params, release_only=True), # type: ignore + ), JobNames.STATELESS_TEST_S3_TSAN: TestConfig( Build.PACKAGE_TSAN, job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore @@ -1223,6 +1229,9 @@ CI_CONFIG = CIConfig( JobNames.UPGRADE_TEST_ASAN: TestConfig( Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore ), + JobNames.STRESS_TEST_AZURE_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params, release_only=True) # type: ignore + ), JobNames.UPGRADE_TEST_TSAN: TestConfig( Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore ), @@ -1236,7 +1245,7 @@ CI_CONFIG = CIConfig( Build.PACKAGE_ASAN, job_config=JobConfig(num_batches=4, **integration_test_common_params, release_only=True), # type: ignore ), - JobNames.INTEGRATION_TEST_ASAN_ANALYZER: TestConfig( + JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: TestConfig( Build.PACKAGE_ASAN, job_config=JobConfig(num_batches=6, **integration_test_common_params), # type: ignore ), @@ -1368,8 +1377,8 @@ REQUIRED_CHECKS = [ JobNames.UNIT_TEST, JobNames.UNIT_TEST_TSAN, JobNames.UNIT_TEST_UBSAN, - JobNames.INTEGRATION_TEST_ASAN_ANALYZER, - JobNames.STATELESS_TEST_ANALYZER_S3_REPLICATED_RELEASE, + JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER, + JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE, ] diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 5561ddf2064..c678b195316 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -443,8 +443,7 @@ def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> or pr_info.number == 0 ) - # FIXME: For now, always set mergeable check in the Merge Queue. It's required to pass MQ - if not_run and not pr_info.is_merge_queue: + if not_run: # Let's avoid unnecessary work return diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 6902e526da9..a66ebbeadf4 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import logging +import sys from github import Github @@ -8,6 +9,7 @@ from commit_status_helper import ( get_commit, get_commit_filtered_statuses, post_commit_status, + set_mergeable_check, trigger_mergeable_check, ) from get_robot_token import get_best_robot_token @@ -18,14 +20,28 @@ from report import PENDING, SUCCESS def main(): logging.basicConfig(level=logging.INFO) + has_failure = False + + # FIXME: temporary hack to fail Mergeable Check in MQ if pipeline has any failed jobs + if len(sys.argv) > 1 and sys.argv[1] == "--pipeline-failure": + has_failure = True + pr_info = PRInfo(need_orgs=True) gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - # Unconditionally update the Mergeable Check at the final step - statuses = get_commit_filtered_statuses(commit) - trigger_mergeable_check(commit, statuses) + statuses = None + + if pr_info.is_merge_queue: + # in MQ Mergeable check status must never be green if any failures in workflow + if has_failure: + set_mergeable_check(commit, "workflow failed", "failure") + else: + # This must be the only place where green MCheck is set in the MQ (in the end of CI) to avoid early merge + set_mergeable_check(commit, "workflow passed", "success") + else: + statuses = get_commit_filtered_statuses(commit) + trigger_mergeable_check(commit, statuses) - if not pr_info.is_merge_queue: statuses = [s for s in statuses if s.context == StatusNames.CI] if not statuses: return diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index a0c977ea25f..6418ee4e0ce 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -15,6 +15,7 @@ from clickhouse_helper import CiLogsCredentials from docker_images_helper import DockerImage, get_docker_image, pull_image from download_release_packages import download_last_release from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH +from get_robot_token import get_parameter_from_ssm from pr_info import PRInfo from report import ERROR, SUCCESS, JobReport, StatusType, TestResults, read_test_results from stopwatch import Stopwatch @@ -27,6 +28,8 @@ def get_additional_envs( check_name: str, run_by_hash_num: int, run_by_hash_total: int ) -> List[str]: result = [] + azure_connection_string = get_parameter_from_ssm("azure_connection_string") + result.append(f"AZURE_CONNECTION_STRING='{azure_connection_string}'") if "DatabaseReplicated" in check_name: result.append("USE_DATABASE_REPLICATED=1") if "DatabaseOrdinary" in check_name: @@ -40,6 +43,9 @@ def get_additional_envs( result.append("RANDOMIZE_OBJECT_KEY_TYPE=1") if "analyzer" in check_name: result.append("USE_OLD_ANALYZER=1") + if "azure" in check_name: + assert "USE_S3_STORAGE_FOR_MERGE_TREE=1" not in result + result.append("USE_AZURE_STORAGE_FOR_MERGE_TREE=1") if run_by_hash_total != 0: result.append(f"RUN_BY_HASH_NUM={run_by_hash_num}") diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 118a790590a..4abaeac30b7 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -265,7 +265,9 @@ class ClickhouseIntegrationTestsRunner: self.start_time = time.time() self.soft_deadline_time = self.start_time + (TASK_TIMEOUT - MAX_TIME_IN_SANDBOX) - self.use_analyzer = os.environ.get("CLICKHOUSE_USE_OLD_ANALYZER") is not None + self.use_old_analyzer = ( + os.environ.get("CLICKHOUSE_USE_OLD_ANALYZER") is not None + ) if "run_by_hash_total" in self.params: self.run_by_hash_total = self.params["run_by_hash_total"] @@ -414,8 +416,8 @@ class ClickhouseIntegrationTestsRunner: result.append("--tmpfs") if self.disable_net_host: result.append("--disable-net-host") - if self.use_analyzer: - result.append("--analyzer") + if self.use_old_analyzer: + result.append("--old-analyzer") return " ".join(result) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index afe4e2b87b0..975e560437a 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -318,7 +318,7 @@ class PRInfo: @property def is_release_branch(self) -> bool: - return self.number == 0 + return self.number == 0 and not self.is_merge_queue @property def is_pr(self): diff --git a/tests/ci/stress.py b/tests/ci/stress.py index b1f5a28ec9e..a9f5916464d 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -19,7 +19,6 @@ def get_options(i: int, upgrade_check: bool) -> str: if i % 3 == 2 and not upgrade_check: options.append(f'''--db-engine="Replicated('/test/db/test_{i}', 's1', 'r1')"''') - client_options.append("allow_experimental_database_replicated=1") client_options.append("enable_deflate_qpl_codec=1") client_options.append("enable_zstd_qat_codec=1") diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 0f2c0cdd222..027d7316e23 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -12,18 +12,23 @@ from build_download_helper import download_all_deb_packages from clickhouse_helper import CiLogsCredentials from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH +from get_robot_token import get_parameter_from_ssm from pr_info import PRInfo from report import ERROR, JobReport, TestResult, TestResults, read_test_results from stopwatch import Stopwatch from tee_popen import TeePopen -def get_additional_envs() -> List[str]: +def get_additional_envs(check_name: str) -> List[str]: result = [] + azure_connection_string = get_parameter_from_ssm("azure_connection_string") + result.append(f"AZURE_CONNECTION_STRING='{azure_connection_string}'") # some cloud-specific features require feature flags enabled # so we need this ENV to be able to disable the randomization # of feature flags result.append("RANDOMIZE_KEEPER_FEATURE_FLAGS=1") + if "azure" in check_name: + result.append("USE_AZURE_STORAGE_FOR_MERGE_TREE=1") return result @@ -143,7 +148,7 @@ def run_stress_test(docker_image_name: str) -> None: pr_info, stopwatch.start_time_str, check_name ) - additional_envs = get_additional_envs() + additional_envs = get_additional_envs(check_name) run_command = get_run_command( packages_path, diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index 7c87144a84d..f336b917644 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -4,7 +4,6 @@ import unittest from ci import CiOptions -from ci_config import JobNames _TEST_BODY_1 = """ #### Run only: @@ -44,6 +43,85 @@ _TEST_BODY_3 = """ - [x] Must include all tests for analyzer """ +_TEST_JOB_LIST = [ + "Style check", + "Fast test", + "package_release", + "package_asan", + "Docker server image", + "Docker keeper image", + "Install packages (amd64)", + "Install packages (arm64)", + "Stateless tests (debug)", + "Stateless tests (release)", + "Stateless tests (coverage)", + "Stateless tests (aarch64)", + "Stateless tests (asan)", + "Stateless tests (tsan)", + "Stateless tests (msan)", + "Stateless tests (ubsan)", + "Stateless tests (release, old analyzer, s3, DatabaseReplicated)", + "Stateless tests (debug, s3 storage)", + "Stateless tests (tsan, s3 storage)", + "Stateless tests flaky check (asan)", + "Stateful tests (debug)", + "Stateful tests (release)", + "Stateful tests (coverage)", + "Stateful tests (aarch64)", + "Stateful tests (asan)", + "Stateful tests (tsan)", + "Stateful tests (msan)", + "Stateful tests (ubsan)", + "Stateful tests (release, ParallelReplicas)", + "Stateful tests (debug, ParallelReplicas)", + "Stateful tests (asan, ParallelReplicas)", + "Stateful tests (msan, ParallelReplicas)", + "Stateful tests (ubsan, ParallelReplicas)", + "Stateful tests (tsan, ParallelReplicas)", + "Stress test (asan)", + "Stress test (tsan)", + "Stress test (ubsan)", + "Stress test (msan)", + "Stress test (debug)", + "Integration tests (release)", + "Integration tests (asan)", + "Integration tests (asan, old analyzer)", + "Integration tests (tsan)", + "Integration tests (aarch64)", + "Integration tests flaky check (asan)", + "Upgrade check (debug)", + "Upgrade check (asan)", + "Upgrade check (tsan)", + "Upgrade check (msan)", + "Unit tests (release)", + "Unit tests (asan)", + "Unit tests (msan)", + "Unit tests (tsan)", + "Unit tests (ubsan)", + "AST fuzzer (debug)", + "AST fuzzer (asan)", + "AST fuzzer (msan)", + "AST fuzzer (tsan)", + "AST fuzzer (ubsan)", + "ClickHouse Keeper Jepsen", + "ClickHouse Server Jepsen", + "Performance Comparison", + "Performance Comparison Aarch64", + "Sqllogic test (release)", + "SQLancer (release)", + "SQLancer (debug)", + "SQLTest", + "Compatibility check (amd64)", + "Compatibility check (aarch64)", + "ClickBench (amd64)", + "ClickBench (aarch64)", + "libFuzzer tests", + "ClickHouse build check", + "ClickHouse special build check", + "Docs check", + "Bugfix validation", +] + class TestCIOptions(unittest.TestCase): def test_pr_body_parsing(self): @@ -69,7 +147,7 @@ class TestCIOptions(unittest.TestCase): ci_options.exclude_keywords, ["tsan", "aarch64", "analyzer", "s3_storage", "coverage"], ) - jobs_to_do = list(JobNames) + jobs_to_do = list(_TEST_JOB_LIST) jobs_to_skip = [] job_params = {} jobs_to_do, jobs_to_skip, job_params = ci_options.apply( @@ -81,9 +159,6 @@ class TestCIOptions(unittest.TestCase): "Style check", "package_release", "package_asan", - "package_ubsan", - "package_debug", - "package_msan", "Stateless tests (asan)", "Stateless tests flaky check (asan)", "Stateless tests (msan)", @@ -103,7 +178,7 @@ class TestCIOptions(unittest.TestCase): ) self.assertCountEqual(ci_options.include_keywords, ["analyzer"]) self.assertIsNone(ci_options.exclude_keywords) - jobs_to_do = list(JobNames) + jobs_to_do = list(_TEST_JOB_LIST) jobs_to_skip = [] job_params = {} jobs_to_do, jobs_to_skip, job_params = ci_options.apply( diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 74137b9dc32..e31b9a0bdc7 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1646,8 +1646,8 @@ class TestCase: client_options = self.add_random_settings(client_options) - if not is_valid_utf_8(self.case_file) or not is_valid_utf_8( - self.reference_file + if not is_valid_utf_8(self.case_file) or ( + self.reference_file and not is_valid_utf_8(self.reference_file) ): proc, stdout, stderr, debug_log, total_time = self.run_single_test( server_logs_level, client_options diff --git a/tests/config/config.d/azure_storage_conf.xml b/tests/config/config.d/azure_storage_conf.xml index 412d40111a7..f24b62b87b1 100644 --- a/tests/config/config.d/azure_storage_conf.xml +++ b/tests/config/config.d/azure_storage_conf.xml @@ -4,13 +4,10 @@ object_storage azure - http://localhost:10000/devstoreaccount1 - cont false - - devstoreaccount1 - Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== 33554432 + openbucketforpublicci + cache diff --git a/tests/config/config.d/azure_storage_policy_by_default.xml b/tests/config/config.d/azure_storage_policy_by_default.xml new file mode 100644 index 00000000000..cab8a106f1b --- /dev/null +++ b/tests/config/config.d/azure_storage_policy_by_default.xml @@ -0,0 +1,5 @@ + + + azure_cache + + diff --git a/tests/config/install.sh b/tests/config/install.sh index 7a472f0eab7..33dcac9d2c7 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -156,7 +156,7 @@ if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; th ln -sf $SRC_PATH/users.d/database_ordinary.xml $DEST_SERVER_PATH/users.d/ fi -if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then +if [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" == "1" ]]; then object_key_types_options=("generate-suffix" "generate-full-key" "generate-template-key") object_key_type="${object_key_types_options[0]}" @@ -177,6 +177,8 @@ if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TR ln -sf $SRC_PATH/config.d/s3_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ ;; esac +elif [[ "$USE_AZURE_STORAGE_FOR_MERGE_TREE" == "1" ]]; then + ln -sf $SRC_PATH/config.d/azure_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ fi ARM="aarch64" diff --git a/tests/config/users.d/database_replicated.xml b/tests/config/users.d/database_replicated.xml index 2b96e7418b6..c049c3559fc 100644 --- a/tests/config/users.d/database_replicated.xml +++ b/tests/config/users.d/database_replicated.xml @@ -1,7 +1,6 @@ - 1 none 120 120 diff --git a/tests/integration/helpers/0_common_enable_analyzer.xml b/tests/integration/helpers/0_common_enable_old_analyzer.xml similarity index 100% rename from tests/integration/helpers/0_common_enable_analyzer.xml rename to tests/integration/helpers/0_common_enable_old_analyzer.xml diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 1f29dfe8eee..693e41253cb 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1601,7 +1601,7 @@ class ClickHouseCluster: with_jdbc_bridge=False, with_hive=False, with_coredns=False, - use_old_analyzer=False, + use_old_analyzer=None, hostname=None, env_variables=None, instance_env_variables=False, @@ -4405,8 +4405,18 @@ class ClickHouseInstance: ) write_embedded_config("0_common_instance_users.xml", users_d_dir) - if self.use_old_analyzer: - write_embedded_config("0_common_enable_analyzer.xml", users_d_dir) + + use_old_analyzer = os.environ.get("CLICKHOUSE_USE_OLD_ANALYZER") is not None + # If specific version was used there can be no + # allow_experimental_analyzer setting, so do this only if it was + # explicitly requested. + if self.tag: + use_old_analyzer = False + # Prefer specified in the test option: + if self.use_old_analyzer is not None: + use_old_analyzer = self.use_old_analyzer + if use_old_analyzer: + write_embedded_config("0_common_enable_old_analyzer.xml", users_d_dir) if len(self.custom_dictionaries_paths): write_embedded_config("0_common_enable_dictionaries.xml", self.config_d_dir) diff --git a/tests/integration/runner b/tests/integration/runner index 97d06c2b78c..a583d7fe897 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -285,11 +285,11 @@ if __name__ == "__main__": ) parser.add_argument( - "--analyzer", + "--old-analyzer", action="store_true", default=False, - dest="analyzer", - help="Use new analyzer infrastructure", + dest="old_analyzer", + help="Use old analyzer infrastructure", ) parser.add_argument( @@ -385,9 +385,9 @@ if __name__ == "__main__": if args.keyword_expression: args.pytest_args += ["-k", args.keyword_expression] - use_analyzer = "" - if args.analyzer: - use_analyzer = "-e CLICKHOUSE_USE_OLD_ANALYZER=1" + use_old_analyzer = "" + if args.old_analyzer: + use_old_analyzer = "-e CLICKHOUSE_USE_OLD_ANALYZER=1" # NOTE: since pytest options is in the argument value already we need to additionally escape '"' pytest_opts = " ".join( @@ -408,7 +408,7 @@ if __name__ == "__main__": f"--volume={args.utils_dir}/backupview:/ClickHouse/utils/backupview " f"--volume={args.utils_dir}/grpc-client/pb2:/ClickHouse/utils/grpc-client/pb2 " f"--volume=/run:/run/host:ro {dockerd_internal_volume} {env_tags} {env_cleanup} " - f"-e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 {use_analyzer} -e PYTHONUNBUFFERED=1 " + f"-e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 {use_old_analyzer} -e PYTHONUNBUFFERED=1 " f'-e PYTEST_ADDOPTS="{parallel_args} {pytest_opts} {tests_list} {rand_args} -vvv"' f" {DIND_INTEGRATION_TESTS_IMAGE_NAME}:{args.docker_image_version}" ) diff --git a/tests/integration/test_attach_table_from_s3_plain_readonly/configs/settings.xml b/tests/integration/test_attach_table_from_s3_plain_readonly/configs/settings.xml index 3e6d615557d..16caee9ba20 100644 --- a/tests/integration/test_attach_table_from_s3_plain_readonly/configs/settings.xml +++ b/tests/integration/test_attach_table_from_s3_plain_readonly/configs/settings.xml @@ -1,7 +1,6 @@ - 1 diff --git a/tests/integration/test_backup_restore_on_cluster/configs/allow_database_types.xml b/tests/integration/test_backup_restore_on_cluster/configs/allow_database_types.xml index e0e026210b1..8ffd2f27a62 100644 --- a/tests/integration/test_backup_restore_on_cluster/configs/allow_database_types.xml +++ b/tests/integration/test_backup_restore_on_cluster/configs/allow_database_types.xml @@ -1,7 +1,6 @@ - 1 1 diff --git a/tests/integration/test_dictionaries_replace/configs/allow_database_types.xml b/tests/integration/test_dictionaries_replace/configs/allow_database_types.xml index 0434df06457..db0dd71de56 100644 --- a/tests/integration/test_dictionaries_replace/configs/allow_database_types.xml +++ b/tests/integration/test_dictionaries_replace/configs/allow_database_types.xml @@ -1,7 +1,6 @@ - 1 diff --git a/tests/integration/test_disabled_access_control_improvements/configs/config.d/disable_access_control_improvements.xml b/tests/integration/test_disabled_access_control_improvements/configs/config.d/disable_access_control_improvements.xml index a335c7f8a1f..cd2025b8a86 100644 --- a/tests/integration/test_disabled_access_control_improvements/configs/config.d/disable_access_control_improvements.xml +++ b/tests/integration/test_disabled_access_control_improvements/configs/config.d/disable_access_control_improvements.xml @@ -1,8 +1,8 @@ - - - - + false + false + false + false diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index fba51949ef0..8ddc1ff3c31 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -295,7 +295,6 @@ def test_replicated_database(cluster): node1 = cluster.instances["node3"] node1.query( "CREATE DATABASE rdb ENGINE=Replicated('/test/rdb', 's1', 'r1')", - settings={"allow_experimental_database_replicated": 1}, ) global uuids @@ -312,7 +311,6 @@ def test_replicated_database(cluster): node2 = cluster.instances["node2"] node2.query( "CREATE DATABASE rdb ENGINE=Replicated('/test/rdb', 's1', 'r2')", - settings={"allow_experimental_database_replicated": 1}, ) node2.query("SYSTEM SYNC DATABASE REPLICA rdb") diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 88f051b022b..10dbb23d961 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -7,7 +7,7 @@ import uuid import time from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, CLICKHOUSE_CI_MIN_TESTED_VERSION cluster = ClickHouseCluster(__file__) @@ -23,9 +23,20 @@ def make_instance(name, cfg, *args, **kwargs): ) +# DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 added in 23.3, ensure that CLICKHOUSE_CI_MIN_TESTED_VERSION fits +assert CLICKHOUSE_CI_MIN_TESTED_VERSION < "23.3" + # _n1/_n2 contains cluster with different -- should fail n1 = make_instance("n1", "configs/remote_servers_n1.xml") n2 = make_instance("n2", "configs/remote_servers_n2.xml") +backward = make_instance( + "backward", + "configs/remote_servers_backward.xml", + image="clickhouse/clickhouse-server", + # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 + tag=CLICKHOUSE_CI_MIN_TESTED_VERSION, + with_installed_binary=True, +) users = pytest.mark.parametrize( "user,password", @@ -399,3 +410,25 @@ def test_per_user_protocol_settings_secure_cluster(user, password): assert int(get_query_setting_on_shard(n1, id_, "max_memory_usage_for_user")) == int( 1e9 ) + + +@users +def test_user_secure_cluster_with_backward(user, password): + id_ = "with-backward-query-dist_secure-" + user + n1.query( + f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password + ) + assert get_query_user_info(n1, id_) == [user, user] + assert get_query_user_info(backward, id_) == [user, user] + + +@users +def test_user_secure_cluster_from_backward(user, password): + id_ = "from-backward-query-dist_secure-" + user + backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) + assert get_query_user_info(n1, id_) == [user, user] + assert get_query_user_info(backward, id_) == [user, user] + + assert n1.contains_in_log( + "Using deprecated interserver protocol because the client is too old. Consider upgrading all nodes in cluster." + ) diff --git a/tests/integration/test_drop_if_empty/configs/drop_if_empty_check.xml b/tests/integration/test_drop_if_empty/configs/drop_if_empty_check.xml index 34a78ac9919..dbad8899646 100644 --- a/tests/integration/test_drop_if_empty/configs/drop_if_empty_check.xml +++ b/tests/integration/test_drop_if_empty/configs/drop_if_empty_check.xml @@ -3,7 +3,6 @@ 9000 - 1 diff --git a/tests/integration/test_drop_if_empty/test.py b/tests/integration/test_drop_if_empty/test.py index d96936eb826..251ed302b38 100644 --- a/tests/integration/test_drop_if_empty/test.py +++ b/tests/integration/test_drop_if_empty/test.py @@ -37,18 +37,13 @@ def start_cluster(): def test_drop_if_empty(start_cluster): - settings = { - "allow_experimental_database_replicated": 1, - } node1.query( "CREATE DATABASE replicateddb " "ENGINE = Replicated('/clickhouse/databases/replicateddb', 'shard1', 'node1')", - settings=settings, ) node2.query( "CREATE DATABASE replicateddb " "ENGINE = Replicated('/clickhouse/databases/replicateddb', 'shard1', 'node2')", - settings=settings, ) node1.query( "CREATE TABLE default.tbl ON CLUSTER 'cluster' (" diff --git a/tests/integration/test_external_cluster/test.py b/tests/integration/test_external_cluster/test.py index 6fa9cd16dbc..306ecf66bc7 100644 --- a/tests/integration/test_external_cluster/test.py +++ b/tests/integration/test_external_cluster/test.py @@ -66,7 +66,6 @@ def test_ddl(started_cluster): def test_ddl_replicated(started_cluster): control_node.query( "CREATE DATABASE test_db ON CLUSTER 'external' ENGINE=Replicated('/replicated')", - settings={"allow_experimental_database_replicated": 1}, ) # Exception is expected assert "It's not initial query" in control_node.query_and_get_error( diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index fc115e5a21f..851da99acf3 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -27,6 +27,8 @@ if is_arm(): # Utilities +IPV6_ADDRESS = "2001:3984:3989::1:1111" + config_dir = os.path.join(script_dir, "./configs") cluster = ClickHouseCluster(__file__) node = cluster.add_instance( @@ -36,12 +38,15 @@ node = cluster.add_instance( env_variables={ "TSAN_OPTIONS": "report_atomic_races=0 " + os.getenv("TSAN_OPTIONS", default="") }, + ipv6_address=IPV6_ADDRESS, ) main_channel = None -def create_channel(): - node_ip_with_grpc_port = cluster.get_instance_ip("node") + ":" + str(GRPC_PORT) +def create_channel(hostname=None): + if not hostname: + hostname = cluster.get_instance_ip("node") + node_ip_with_grpc_port = hostname + ":" + str(GRPC_PORT) channel = grpc.insecure_channel(node_ip_with_grpc_port) grpc.channel_ready_future(channel).result(timeout=10) global main_channel @@ -204,6 +209,11 @@ def test_select_one(): assert query("SELECT 1") == "1\n" +def test_ipv6_select_one(): + with create_channel(f"[{IPV6_ADDRESS}]") as channel: + assert query("SELECT 1", channel=channel) == "1\n" + + def test_ordinary_query(): assert query("SELECT count() FROM numbers(100)") == "100\n" diff --git a/tests/integration/test_replicated_database/configs/settings.xml b/tests/integration/test_replicated_database/configs/settings.xml index 7c0e60a044e..c637fe8eead 100644 --- a/tests/integration/test_replicated_database/configs/settings.xml +++ b/tests/integration/test_replicated_database/configs/settings.xml @@ -2,7 +2,6 @@ 1 - 1 1 0 0 diff --git a/tests/integration/test_replicated_database_cluster_groups/configs/settings.xml b/tests/integration/test_replicated_database_cluster_groups/configs/settings.xml index 5666ffeace8..872a6cd0b6c 100644 --- a/tests/integration/test_replicated_database_cluster_groups/configs/settings.xml +++ b/tests/integration/test_replicated_database_cluster_groups/configs/settings.xml @@ -2,7 +2,6 @@ 1 - 1 1 diff --git a/tests/performance/alter_select.xml b/tests/performance/alter_select.xml index fbbf603dcba..bac5a29afc5 100644 --- a/tests/performance/alter_select.xml +++ b/tests/performance/alter_select.xml @@ -32,4 +32,3 @@ drop table alter_select_{engine} - diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index 8ebe1807a1b..91f7a276ea3 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -15,7 +15,7 @@ function create_db() SUFFIX=$(($RANDOM % 16)) # Multiple database replicas on one server are actually not supported (until we have namespaces). # So CREATE TABLE queries will fail on all replicas except one. But it's still makes sense for a stress test. - $CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 --query \ + $CLICKHOUSE_CLIENT --query \ "create database if not exists ${CLICKHOUSE_DATABASE}_repl_01111_$SUFFIX engine=Replicated('/test/01111/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '$SHARD', '$REPLICA')" \ 2>&1| grep -Fa "Exception: " | grep -Fv "REPLICA_ALREADY_EXISTS" | grep -Fiv "Will not try to start it up" | \ grep -Fv "Coordination::Exception" | grep -Fv "already contains some data and it does not look like Replicated database path" diff --git a/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql index fc3fcb34fc0..de244e64999 100644 --- a/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql +++ b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql @@ -43,7 +43,6 @@ DROP DATABASE test_01148_atomic; DROP TABLE rmt; DROP TABLE rmt1; -SET allow_experimental_database_replicated=1; DROP DATABASE IF EXISTS imdb_01148; CREATE DATABASE imdb_01148 ENGINE = Replicated('/test/databases/imdb_01148', '{shard}', '{replica}'); CREATE TABLE imdb_01148.movie_directors (`director_id` UInt64, `movie_id` UInt64) ENGINE = ReplicatedMergeTree ORDER BY (director_id, movie_id) SETTINGS index_granularity = 8192; diff --git a/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.reference b/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.reference deleted file mode 100644 index 0a7746e1832..00000000000 --- a/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.reference +++ /dev/null @@ -1,162 +0,0 @@ -0 -1 -2 -0 -1 -2 -0 -1 -2 -2 -1 -0 -0 -1 -2 -0 -1 -2 -0 -1 -2 -2 -1 -0 -2 -1 -0 -2 -1 -0 -0 -1 -2 -2 -1 -0 -2 -1 -0 -SELECT number -FROM numbers(3) -ORDER BY number ASC -SELECT number -FROM numbers(3) -ORDER BY abs(toFloat32(number)) ASC -SELECT number -FROM numbers(3) -ORDER BY toFloat32(abs(number)) ASC -SELECT number -FROM numbers(3) -ORDER BY number DESC -SELECT number -FROM numbers(3) -ORDER BY exp(number) ASC -SELECT roundToExp2(number) AS x -FROM numbers(3) -ORDER BY number ASC -SELECT number AS x -FROM numbers(3) -ORDER BY number ASC -SELECT number -FROM numbers(3) -ORDER BY number DESC -SELECT number -FROM numbers(3) -ORDER BY abs(toFloat32(number)) DESC -SELECT number -FROM numbers(3) -ORDER BY toFloat32(abs(number)) DESC -SELECT number -FROM numbers(3) -ORDER BY number ASC -SELECT number -FROM numbers(3) -ORDER BY exp(number) DESC -SELECT roundToExp2(number) AS x -FROM numbers(3) -ORDER BY number DESC -0 -1 -2 -0 -1 -2 -0 -1 -2 -2 -1 -0 -0 -1 -2 -0 -1 -2 -0 -1 -2 -2 -1 -0 -2 -1 -0 -2 -1 -0 -0 -1 -2 -2 -1 -0 -2 -1 -0 -SELECT number -FROM numbers(3) -ORDER BY toFloat32(toFloat64(number)) ASC -SELECT number -FROM numbers(3) -ORDER BY abs(toFloat32(number)) ASC -SELECT number -FROM numbers(3) -ORDER BY toFloat32(abs(number)) ASC -SELECT number -FROM numbers(3) -ORDER BY -number ASC -SELECT number -FROM numbers(3) -ORDER BY exp(number) ASC -SELECT roundToExp2(number) AS x -FROM numbers(3) -ORDER BY - x ASC, - toFloat32(x) ASC -SELECT number AS x -FROM numbers(3) -ORDER BY - toFloat32(x) AS k ASC, - toFloat64(k) ASC -SELECT number -FROM numbers(3) -ORDER BY toFloat32(toFloat64(number)) DESC -SELECT number -FROM numbers(3) -ORDER BY abs(toFloat32(number)) DESC -SELECT number -FROM numbers(3) -ORDER BY toFloat32(abs(number)) DESC -SELECT number -FROM numbers(3) -ORDER BY -number DESC -SELECT number -FROM numbers(3) -ORDER BY exp(number) DESC -SELECT roundToExp2(number) AS x -FROM numbers(3) -ORDER BY - x DESC, - toFloat32(x) DESC diff --git a/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.sql b/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.sql deleted file mode 100644 index 516c3d26a75..00000000000 --- a/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.sql +++ /dev/null @@ -1,58 +0,0 @@ -SET optimize_monotonous_functions_in_order_by = 1; - -SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)); -SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)); -SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)); -SELECT number FROM numbers(3) ORDER BY -number; -SELECT number FROM numbers(3) ORDER BY exp(number); -SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x, toFloat32(x); -SELECT number AS x FROM numbers(3) ORDER BY toFloat32(x) as k, toFloat64(k); -SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)) DESC; -SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)) DESC; -SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)) DESC; -SELECT number FROM numbers(3) ORDER BY -number DESC; -SELECT number FROM numbers(3) ORDER BY exp(number) DESC; -SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x DESC, toFloat32(x) DESC; -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)); -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)); -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)); -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY -number; -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY exp(number); -EXPLAIN SYNTAX SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x, toFloat32(x); -EXPLAIN SYNTAX SELECT number AS x FROM numbers(3) ORDER BY toFloat32(x) as k, toFloat64(k); -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)) DESC; -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)) DESC; -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)) DESC; -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY -number DESC; -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY exp(number) DESC; -EXPLAIN SYNTAX SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x DESC, toFloat32(x) DESC; - -SET optimize_monotonous_functions_in_order_by = 0; - -SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)); -SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)); -SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)); -SELECT number FROM numbers(3) ORDER BY -number; -SELECT number FROM numbers(3) ORDER BY exp(number); -SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x, toFloat32(x); -SELECT number AS x FROM numbers(3) ORDER BY toFloat32(x) as k, toFloat64(k); -SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)) DESC; -SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)) DESC; -SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)) DESC; -SELECT number FROM numbers(3) ORDER BY -number DESC; -SELECT number FROM numbers(3) ORDER BY exp(number) DESC; -SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x DESC, toFloat32(x) DESC; -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)); -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)); -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)); -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY -number; -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY exp(number); -EXPLAIN SYNTAX SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x, toFloat32(x); -EXPLAIN SYNTAX SELECT number AS x FROM numbers(3) ORDER BY toFloat32(x) as k, toFloat64(k); -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)) DESC; -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)) DESC; -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)) DESC; -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY -number DESC; -EXPLAIN SYNTAX SELECT number FROM numbers(3) ORDER BY exp(number) DESC; -EXPLAIN SYNTAX SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x DESC, toFloat32(x) DESC; --- TODO: exp() should be monotonous function diff --git a/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.reference b/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.reference deleted file mode 100644 index 186e6565ffe..00000000000 --- a/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.reference +++ /dev/null @@ -1,32 +0,0 @@ -1 4 3 -1 3 3 -2 5 4 -2 2 4 -1 3 3 -1 4 3 -2 2 4 -2 5 4 -2 -1 -2 -1 3 3 -1 4 3 -2 2 4 -2 5 4 -2 -1 4 3 -1 3 3 -2 5 4 -2 2 4 -1 3 3 -1 4 3 -2 2 4 -2 5 4 -2 -1 -2 -1 3 3 -1 4 3 -2 2 4 -2 5 4 -2 diff --git a/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.sql b/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.sql deleted file mode 100644 index 87f0f462ab9..00000000000 --- a/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.sql +++ /dev/null @@ -1,21 +0,0 @@ -DROP TABLE IF EXISTS test; -CREATE TABLE test (x Int8, y Int8, z Int8) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO test VALUES (1, 3, 3), (1, 4, 3), (2, 5, 4), (2, 2, 4); - -SET optimize_monotonous_functions_in_order_by = 1; -SELECT * FROM test ORDER BY toFloat32(x), -y, -z DESC; -SELECT * FROM test ORDER BY toFloat32(x), -(-y), -z DESC; -SELECT max(x) as k FROM test ORDER BY k; -SELECT roundToExp2(x) as k FROM test GROUP BY k ORDER BY k; -SELECT roundToExp2(x) as k, y, z FROM test WHERE k >= 1 ORDER BY k, y, z; -SELECT max(x) as k FROM test HAVING k > 0 ORDER BY k; - -SET optimize_monotonous_functions_in_order_by = 0; -SELECT * FROM test ORDER BY toFloat32(x), -y, -z DESC; -SELECT * FROM test ORDER BY toFloat32(x), -(-y), -z DESC; -SELECT max(x) as k FROM test ORDER BY k; -SELECT roundToExp2(x) as k From test GROUP BY k ORDER BY k; -SELECT roundToExp2(x) as k, y, z FROM test WHERE k >= 1 ORDER BY k, y, z; -SELECT max(x) as k FROM test HAVING k > 0 ORDER BY k; - -DROP TABLE test; diff --git a/tests/queries/0_stateless/01399_http_request_headers.reference b/tests/queries/0_stateless/01399_http_request_headers.reference index 92ea6606a12..2ebbe30d5d8 100644 --- a/tests/queries/0_stateless/01399_http_request_headers.reference +++ b/tests/queries/0_stateless/01399_http_request_headers.reference @@ -4,9 +4,10 @@ Code: 516 1 Code: 516 1 -Code: 516 +1 processes processes Code: 81 [1] Code: 73 +1 diff --git a/tests/queries/0_stateless/01399_http_request_headers.sh b/tests/queries/0_stateless/01399_http_request_headers.sh index f06e7ffc32b..37af501f32a 100755 --- a/tests/queries/0_stateless/01399_http_request_headers.sh +++ b/tests/queries/0_stateless/01399_http_request_headers.sh @@ -10,8 +10,9 @@ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -H 'X-ClickHouse-User: header_test' - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -H 'X-ClickHouse-Key: ' -d 'SELECT 1' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -H 'X-ClickHouse-Key: header_test' -d 'SELECT 1' | grep -o 'Code: 516' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -H 'X-ClickHouse-Quota: ' -d 'SELECT 1' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -H 'X-ClickHouse-Quota: header_test' -d 'SELECT 1' | grep -o 'Code: 516' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -H 'X-ClickHouse-Quota: header_test' -d 'SELECT 1' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -H 'X-ClickHouse-Database: system' -d 'SHOW TABLES' | grep -o 'processes' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -H 'X-ClickHouse-Database: header_test' -d 'SHOW TABLES' | grep -o 'Code: 81' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -H 'X-ClickHouse-Format: JSONCompactEachRow' -d 'SELECT 1' | grep -o '\[1\]' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -H 'X-ClickHouse-Format: header_test' -d 'SELECT 1' | grep -o 'Code: 73' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}"a_key=pingpong" -H 'X-ClickHouse-User: default' -d 'SELECT 1' diff --git a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference deleted file mode 100644 index 49b4d22ea71..00000000000 --- a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference +++ /dev/null @@ -1,41 +0,0 @@ -SELECT - timestamp, - key -FROM test_order_by -ORDER BY timestamp ASC -LIMIT 10 -Expression (Project names) - Limit (preliminary LIMIT (without OFFSET)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Change column names to column identifiers))) - ReadFromMergeTree (default.test_order_by) -SELECT - timestamp, - key -FROM test_order_by -ORDER BY toDate(timestamp) ASC -LIMIT 10 -Expression (Project names) - Limit (preliminary LIMIT (without OFFSET)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Change column names to column identifiers))) - ReadFromMergeTree (default.test_order_by) -SELECT - timestamp, - key -FROM test_order_by -ORDER BY - toDate(timestamp) ASC, - timestamp ASC -LIMIT 10 -Expression (Project names) - Limit (preliminary LIMIT (without OFFSET)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Change column names to column identifiers))) - ReadFromMergeTree (default.test_order_by) -SELECT - timestamp, - key -FROM test_order_by -ORDER BY timestamp ASC -LIMIT 10 diff --git a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.sql b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.sql deleted file mode 100644 index 2fe24d1662d..00000000000 --- a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.sql +++ /dev/null @@ -1,28 +0,0 @@ -SET allow_experimental_analyzer = 1; -SET optimize_monotonous_functions_in_order_by = 1; -SET optimize_read_in_order = 1; - -DROP TABLE IF EXISTS test_order_by; - -CREATE TABLE test_order_by (timestamp DateTime, key UInt32) ENGINE=MergeTree() ORDER BY (toDate(timestamp), key); -INSERT INTO test_order_by SELECT now() + toIntervalSecond(number), number % 4 FROM numbers(10000); -OPTIMIZE TABLE test_order_by FINAL; - -EXPLAIN SYNTAX SELECT * FROM test_order_by ORDER BY timestamp LIMIT 10; -EXPLAIN PLAN SELECT * FROM test_order_by ORDER BY timestamp LIMIT 10; - -EXPLAIN SYNTAX SELECT * FROM test_order_by ORDER BY toDate(timestamp) LIMIT 10; -EXPLAIN PLAN SELECT * FROM test_order_by ORDER BY toDate(timestamp) LIMIT 10; - -EXPLAIN SYNTAX SELECT * FROM test_order_by ORDER BY toDate(timestamp), timestamp LIMIT 10; -EXPLAIN PLAN SELECT * FROM test_order_by ORDER BY toDate(timestamp), timestamp LIMIT 10; - -DROP TABLE IF EXISTS test_order_by; - -CREATE TABLE test_order_by (timestamp DateTime, key UInt32) ENGINE=MergeTree() ORDER BY tuple(); -INSERT INTO test_order_by SELECT now() + toIntervalSecond(number), number % 4 FROM numbers(10000); -OPTIMIZE TABLE test_order_by FINAL; - -EXPLAIN SYNTAX SELECT * FROM test_order_by ORDER BY toDate(timestamp), timestamp LIMIT 10; - -DROP TABLE IF EXISTS test_order_by; diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.sql b/tests/queries/0_stateless/01576_alias_column_rewrite.sql index 1da88abb759..e741f899b98 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.sql +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.sql @@ -17,7 +17,7 @@ INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-01 12:00:00' INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(10); INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(10); -set optimize_respect_aliases = 1, optimize_monotonous_functions_in_order_by = 1; +set optimize_respect_aliases = 1; SELECT 'test-partition-prune'; SELECT COUNT() = 10 FROM test_table WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 10; diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python index c62d2e9d98a..9072dfeb09f 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.python @@ -98,6 +98,8 @@ client_compwords_positive = [ "default", # system.aggregate_function_combinators "uniqCombined64ForEach", + # system.keywords + "CHANGEABLE_IN_READONLY", # FIXME: one may add separate case for suggestion_limit # system.databases "system", diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference index cf3e942adfe..78f8967263d 100644 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference @@ -9,6 +9,7 @@ test_shard_localhost: OK default_path_test: OK default: OK uniqCombined64ForEach: OK +CHANGEABLE_IN_READONLY: OK system: OK aggregate_function_combinators: OK primary_key_bytes_in_memory_allocated: OK diff --git a/tests/queries/0_stateless/02021_create_database_with_comment.sh b/tests/queries/0_stateless/02021_create_database_with_comment.sh index 8432963e059..f77397dc482 100755 --- a/tests/queries/0_stateless/02021_create_database_with_comment.sh +++ b/tests/queries/0_stateless/02021_create_database_with_comment.sh @@ -34,5 +34,4 @@ test_db_comments "Ordinary" test_db_comments "Lazy(1)" # test_db_comments "MySQL('127.0.0.1:9004', 'default', 'default', '')" # fails due to CH internal reasons # test_db_comments "SQLite('dummy_sqlitedb')" -## needs to be explicitly enabled with `SET allow_experimental_database_replicated=1` # test_db_comments "Replicated('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') ORDER BY k" diff --git a/tests/queries/0_stateless/02147_order_by_optimizations.reference b/tests/queries/0_stateless/02147_order_by_optimizations.reference deleted file mode 100644 index 8708826ea2b..00000000000 --- a/tests/queries/0_stateless/02147_order_by_optimizations.reference +++ /dev/null @@ -1,21 +0,0 @@ -SELECT - date, - v -FROM t_02147 -ORDER BY - toStartOfHour(date) ASC, - v ASC -SELECT - date, - v -FROM t_02147_dist -ORDER BY - toStartOfHour(date) ASC, - v ASC -SELECT - date, - v -FROM t_02147_merge -ORDER BY - toStartOfHour(date) ASC, - v ASC diff --git a/tests/queries/0_stateless/02147_order_by_optimizations.sql b/tests/queries/0_stateless/02147_order_by_optimizations.sql deleted file mode 100644 index 3925e92bffc..00000000000 --- a/tests/queries/0_stateless/02147_order_by_optimizations.sql +++ /dev/null @@ -1,19 +0,0 @@ -DROP TABLE IF EXISTS t_02147; -DROP TABLE IF EXISTS t_02147_dist; -DROP TABLE IF EXISTS t_02147_merge; - -CREATE TABLE t_02147 (date DateTime, v UInt32) -ENGINE = MergeTree ORDER BY toStartOfHour(date); - -CREATE TABLE t_02147_dist AS t_02147 ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_02147); -CREATE TABLE t_02147_merge AS t_02147 ENGINE = Merge(currentDatabase(), 't_02147'); - -SET optimize_monotonous_functions_in_order_by = 1; - -EXPLAIN SYNTAX SELECT * FROM t_02147 ORDER BY toStartOfHour(date), v; -EXPLAIN SYNTAX SELECT * FROM t_02147_dist ORDER BY toStartOfHour(date), v; -EXPLAIN SYNTAX SELECT * FROM t_02147_merge ORDER BY toStartOfHour(date), v; - -drop table t_02147; -CREATE TABLE t_02147 (date DateTime, v UInt32) ENGINE = MergeTree ORDER BY date; -select *, toString(t.v) as s from t_02147_merge as t order by date, s; diff --git a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh index 791102b9cbd..d1a3825d286 100755 --- a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh +++ b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh @@ -10,7 +10,7 @@ ${CLICKHOUSE_CLIENT} -q "create table mute_stylecheck (x UInt32) engine = Replic ${CLICKHOUSE_CLIENT} -q "CREATE USER user_${CLICKHOUSE_DATABASE} settings database_replicated_allow_only_replicated_engine=1" ${CLICKHOUSE_CLIENT} -q "GRANT CREATE TABLE ON ${CLICKHOUSE_DATABASE}_db.* TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON Memory, TABLE ENGINE ON MergeTree, TABLE ENGINE ON ReplicatedMergeTree TO user_${CLICKHOUSE_DATABASE}" -${CLICKHOUSE_CLIENT} --allow_experimental_database_replicated=1 --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" +${CLICKHOUSE_CLIENT} -q "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_memory (x UInt32) engine = Memory;" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_mt (x UInt32) engine = MergeTree order by x;" 2>&1 | grep -o "Only tables with a Replicated engine" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_mt (x UInt32) engine = MergeTree order by x;" diff --git a/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.reference b/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.reference deleted file mode 100644 index 44e0be8e356..00000000000 --- a/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.reference +++ /dev/null @@ -1,4 +0,0 @@ -0 -0 -0 -0 diff --git a/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.sql b/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.sql deleted file mode 100644 index f0085b7660f..00000000000 --- a/tests/queries/0_stateless/02315_optimize_monotonous_functions_in_order_by_remote.sql +++ /dev/null @@ -1,10 +0,0 @@ -SET prefer_localhost_replica = 1; -SET optimize_monotonous_functions_in_order_by = 1; - -SELECT * -FROM cluster(test_cluster_two_shards_localhost, system, one) -ORDER BY toDateTime(dummy); - -SELECT * -FROM cluster(test_cluster_two_shards_localhost) -ORDER BY toDateTime(dummy) diff --git a/tests/queries/0_stateless/02400_create_table_on_cluster_normalization.sql b/tests/queries/0_stateless/02400_create_table_on_cluster_normalization.sql index 54e4ccf6762..85831a21b47 100644 --- a/tests/queries/0_stateless/02400_create_table_on_cluster_normalization.sql +++ b/tests/queries/0_stateless/02400_create_table_on_cluster_normalization.sql @@ -21,7 +21,6 @@ select * from t_l5ydey order by c_qv5rv; show create t_l5ydey; -- Correct error code if creating database with the same path as table has -set allow_experimental_database_replicated=1; create database local_t_l5ydey engine=Replicated('/clickhouse/tables/test_' || currentDatabase() || '/{shard}/local_t_l5ydey', '1', '1'); -- { serverError BAD_ARGUMENTS } drop table local_t_l5ydey; diff --git a/tests/queries/0_stateless/02445_replicated_db_alter_partition.sh b/tests/queries/0_stateless/02445_replicated_db_alter_partition.sh index e07737225db..4d9048354a1 100755 --- a/tests/queries/0_stateless/02445_replicated_db_alter_partition.sh +++ b/tests/queries/0_stateless/02445_replicated_db_alter_partition.sh @@ -8,7 +8,7 @@ db="rdb_$CLICKHOUSE_DATABASE" db2="${db}_2" db3="${db}_3" -$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')" +$CLICKHOUSE_CLIENT -q "create database $db engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')" $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db.mt (n int) engine=MergeTree order by tuple()" $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db.rmt (n int) engine=ReplicatedMergeTree order by tuple()" @@ -16,8 +16,8 @@ $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db.rmt ( $CLICKHOUSE_CLIENT -q "insert into $db.rmt values (0), (1)" $CLICKHOUSE_CLIENT -q "insert into $db.mt values (0), (1)" -$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db2 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r2')" -$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db3 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's2', 'r1')" +$CLICKHOUSE_CLIENT -q "create database $db2 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r2')" +$CLICKHOUSE_CLIENT -q "create database $db3 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's2', 'r1')" $CLICKHOUSE_CLIENT -q "alter table $db.mt drop partition id 'all', add column m int" 2>&1| grep -Eo "not allowed to execute ALTERs of different types" | head -1 $CLICKHOUSE_CLIENT -q "alter table $db.rmt drop partition id 'all', add column m int" 2>&1| grep -Eo "not allowed to execute ALTERs of different types" | head -1 diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh index 5c4604bc8cd..1604d527f2b 100755 --- a/tests/queries/0_stateless/02447_drop_database_replica.sh +++ b/tests/queries/0_stateless/02447_drop_database_replica.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) db="rdb_$CLICKHOUSE_DATABASE" $CLICKHOUSE_CLIENT -q "system flush logs" -$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')" +$CLICKHOUSE_CLIENT -q "create database $db engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')" $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db.t as system.query_log" # Suppress style check: current_database=$CLICKHOUSE_DATABASE $CLICKHOUSE_CLIENT -q "show tables from $db" @@ -26,8 +26,8 @@ $CLICKHOUSE_CLIENT -q "system drop database replica 's2/r1' from zkpath '/test/$ db2="${db}_2" db3="${db}_3" -$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db2 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r2')" -$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db3 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's2', 'r1')" +$CLICKHOUSE_CLIENT -q "create database $db2 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r2')" +$CLICKHOUSE_CLIENT -q "create database $db3 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's2', 'r1')" $CLICKHOUSE_CLIENT -q "system sync database replica $db" $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db' and shard_num=1 and replica_num=1" $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it" @@ -56,7 +56,7 @@ $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db.t2 as $CLICKHOUSE_CLIENT -q "show tables from $db" db4="${db}_4" -$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db4 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')" +$CLICKHOUSE_CLIENT -q "create database $db4 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')" $CLICKHOUSE_CLIENT -q "system sync database replica $db4" $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db4'" diff --git a/tests/queries/0_stateless/02483_add_engine_full_column_to_system_databases.sql b/tests/queries/0_stateless/02483_add_engine_full_column_to_system_databases.sql index 1f35a0f6273..c4e54e3cc45 100644 --- a/tests/queries/0_stateless/02483_add_engine_full_column_to_system_databases.sql +++ b/tests/queries/0_stateless/02483_add_engine_full_column_to_system_databases.sql @@ -1,8 +1,6 @@ -- Tags: no-parallel DROP DATABASE IF EXISTS replicated_database_test; -SET allow_experimental_database_replicated=1; CREATE DATABASE IF NOT EXISTS replicated_database_test ENGINE = Replicated('some/path/' || currentDatabase() || '/replicated_database_test', 'shard_1', 'replica_1') SETTINGS max_broken_tables_ratio=1; SELECT engine_full FROM system.databases WHERE name = 'replicated_database_test'; -DROP DATABASE IF EXISTS replicated_database_test; - +DROP DATABASE IF EXISTS replicated_database_test; diff --git a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh index a050f7b00d7..a0f228e6af4 100755 --- a/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh +++ b/tests/queries/0_stateless/02514_database_replicated_no_arguments_for_rmt.sh @@ -13,7 +13,7 @@ ${CLICKHOUSE_CLIENT} -q "create table mute_stylecheck (x UInt32) engine = Replic ${CLICKHOUSE_CLIENT} -q "CREATE USER user_${CLICKHOUSE_DATABASE} settings database_replicated_allow_replicated_engine_arguments=0" ${CLICKHOUSE_CLIENT} -q "GRANT CREATE TABLE ON ${CLICKHOUSE_DATABASE}_db.* TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON ReplicatedMergeTree TO user_${CLICKHOUSE_DATABASE}" -${CLICKHOUSE_CLIENT} --allow_experimental_database_replicated=1 --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" +${CLICKHOUSE_CLIENT} -q "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_ok (x UInt32) engine = ReplicatedMergeTree order by x;" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt_fail (x UInt32) engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/root/{shard}', '{replica}') order by x; -- { serverError 80 }" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" diff --git a/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql b/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql index 1bb2aaf85e6..9988eef0ad3 100644 --- a/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql +++ b/tests/queries/0_stateless/02524_fuzz_and_fuss_2.sql @@ -9,6 +9,6 @@ ENGINE = Memory; INSERT INTO data_a_02187 SELECT * FROM system.one -SETTINGS max_block_size = '1', min_insert_block_size_rows = '65536', min_insert_block_size_bytes = '0', max_insert_threads = '0', max_threads = '3', receive_timeout = '10', receive_data_timeout_ms = '10000', connections_with_failover_max_tries = '0', extremes = '1', use_uncompressed_cache = '0', optimize_move_to_prewhere = '1', optimize_move_to_prewhere_if_final = '0', replication_alter_partitions_sync = '2', totals_mode = 'before_having', allow_suspicious_low_cardinality_types = '1', compile_expressions = '1', min_count_to_compile_expression = '0', group_by_two_level_threshold = '100', distributed_aggregation_memory_efficient = '0', distributed_group_by_no_merge = '1', optimize_distributed_group_by_sharding_key = '1', optimize_skip_unused_shards = '1', optimize_skip_unused_shards_rewrite_in = '1', force_optimize_skip_unused_shards = '2', optimize_skip_unused_shards_nesting = '1', force_optimize_skip_unused_shards_nesting = '2', merge_tree_min_rows_for_concurrent_read = '10000', force_primary_key = '1', network_compression_method = 'ZSTD', network_zstd_compression_level = '7', log_queries = '0', log_queries_min_type = 'QUERY_FINISH', distributed_product_mode = 'local', insert_quorum = '2', insert_quorum_timeout = '0', insert_quorum_parallel = '0', select_sequential_consistency = '1', join_use_nulls = '1', any_join_distinct_right_table_keys = '1', preferred_max_column_in_block_size_bytes = '32', distributed_foreground_insert = '1', insert_allow_materialized_columns = '1', use_index_for_in_with_subqueries = '1', joined_subquery_requires_alias = '0', empty_result_for_aggregation_by_empty_set = '1', allow_suspicious_codecs = '1', query_profiler_real_time_period_ns = '0', query_profiler_cpu_time_period_ns = '0', opentelemetry_start_trace_probability = '1', max_rows_to_read = '1000000', read_overflow_mode = 'break', max_rows_to_group_by = '10', group_by_overflow_mode = 'any', max_rows_to_sort = '100', sort_overflow_mode = 'break', max_result_rows = '10', max_execution_time = '3', max_execution_speed = '1', max_bytes_in_join = '100', join_algorithm = 'partial_merge', max_memory_usage = '1099511627776', log_query_threads = '1', send_logs_level = 'fatal', enable_optimize_predicate_expression = '1', prefer_localhost_replica = '1', optimize_read_in_order = '1', optimize_aggregation_in_order = '1', read_in_order_two_level_merge_threshold = '1', allow_introspection_functions = '1', check_query_single_value_result = '1', allow_experimental_live_view = '1', default_table_engine = 'Memory', mutations_sync = '2', convert_query_to_cnf = '0', optimize_arithmetic_operations_in_aggregate_functions = '1', optimize_duplicate_order_by_and_distinct = '0', optimize_multiif_to_if = '0', optimize_monotonous_functions_in_order_by = '1', optimize_functions_to_subcolumns = '1', optimize_using_constraints = '1', optimize_substitute_columns = '1', optimize_append_index = '1', transform_null_in = '1', data_type_default_nullable = '1', cast_keep_nullable = '1', cast_ipv4_ipv6_default_on_conversion_error = '0', system_events_show_zero_values = '1', enable_global_with_statement = '1', optimize_on_insert = '0', optimize_rewrite_sum_if_to_count_if = '1', distributed_ddl_output_mode = 'throw', union_default_mode = 'ALL', optimize_aggregators_of_group_by_keys = '1', optimize_group_by_function_keys = '1', short_circuit_function_evaluation = 'enable', async_insert = '1', enable_filesystem_cache = '0', allow_deprecated_database_ordinary = '1', allow_deprecated_syntax_for_merge_tree = '1', allow_experimental_nlp_functions = '1', allow_experimental_object_type = '1', allow_experimental_map_type = '1', optimize_use_projections = '1', input_format_null_as_default = '1', input_format_ipv4_default_on_conversion_error = '0', input_format_ipv6_default_on_conversion_error = '0', output_format_json_named_tuples_as_objects = '1', output_format_write_statistics = '0', output_format_pretty_row_numbers = '1'; +SETTINGS max_block_size = '1', min_insert_block_size_rows = '65536', min_insert_block_size_bytes = '0', max_insert_threads = '0', max_threads = '3', receive_timeout = '10', receive_data_timeout_ms = '10000', connections_with_failover_max_tries = '0', extremes = '1', use_uncompressed_cache = '0', optimize_move_to_prewhere = '1', optimize_move_to_prewhere_if_final = '0', replication_alter_partitions_sync = '2', totals_mode = 'before_having', allow_suspicious_low_cardinality_types = '1', compile_expressions = '1', min_count_to_compile_expression = '0', group_by_two_level_threshold = '100', distributed_aggregation_memory_efficient = '0', distributed_group_by_no_merge = '1', optimize_distributed_group_by_sharding_key = '1', optimize_skip_unused_shards = '1', optimize_skip_unused_shards_rewrite_in = '1', force_optimize_skip_unused_shards = '2', optimize_skip_unused_shards_nesting = '1', force_optimize_skip_unused_shards_nesting = '2', merge_tree_min_rows_for_concurrent_read = '10000', force_primary_key = '1', network_compression_method = 'ZSTD', network_zstd_compression_level = '7', log_queries = '0', log_queries_min_type = 'QUERY_FINISH', distributed_product_mode = 'local', insert_quorum = '2', insert_quorum_timeout = '0', insert_quorum_parallel = '0', select_sequential_consistency = '1', join_use_nulls = '1', any_join_distinct_right_table_keys = '1', preferred_max_column_in_block_size_bytes = '32', distributed_foreground_insert = '1', insert_allow_materialized_columns = '1', use_index_for_in_with_subqueries = '1', joined_subquery_requires_alias = '0', empty_result_for_aggregation_by_empty_set = '1', allow_suspicious_codecs = '1', query_profiler_real_time_period_ns = '0', query_profiler_cpu_time_period_ns = '0', opentelemetry_start_trace_probability = '1', max_rows_to_read = '1000000', read_overflow_mode = 'break', max_rows_to_group_by = '10', group_by_overflow_mode = 'any', max_rows_to_sort = '100', sort_overflow_mode = 'break', max_result_rows = '10', max_execution_time = '3', max_execution_speed = '1', max_bytes_in_join = '100', join_algorithm = 'partial_merge', max_memory_usage = '1099511627776', log_query_threads = '1', send_logs_level = 'fatal', enable_optimize_predicate_expression = '1', prefer_localhost_replica = '1', optimize_read_in_order = '1', optimize_aggregation_in_order = '1', read_in_order_two_level_merge_threshold = '1', allow_introspection_functions = '1', check_query_single_value_result = '1', allow_experimental_live_view = '1', default_table_engine = 'Memory', mutations_sync = '2', convert_query_to_cnf = '0', optimize_arithmetic_operations_in_aggregate_functions = '1', optimize_duplicate_order_by_and_distinct = '0', optimize_multiif_to_if = '0', optimize_functions_to_subcolumns = '1', optimize_using_constraints = '1', optimize_substitute_columns = '1', optimize_append_index = '1', transform_null_in = '1', data_type_default_nullable = '1', cast_keep_nullable = '1', cast_ipv4_ipv6_default_on_conversion_error = '0', system_events_show_zero_values = '1', enable_global_with_statement = '1', optimize_on_insert = '0', optimize_rewrite_sum_if_to_count_if = '1', distributed_ddl_output_mode = 'throw', union_default_mode = 'ALL', optimize_aggregators_of_group_by_keys = '1', optimize_group_by_function_keys = '1', short_circuit_function_evaluation = 'enable', async_insert = '1', enable_filesystem_cache = '0', allow_deprecated_database_ordinary = '1', allow_deprecated_syntax_for_merge_tree = '1', allow_experimental_nlp_functions = '1', allow_experimental_object_type = '1', allow_experimental_map_type = '1', optimize_use_projections = '1', input_format_null_as_default = '1', input_format_ipv4_default_on_conversion_error = '0', input_format_ipv6_default_on_conversion_error = '0', output_format_json_named_tuples_as_objects = '1', output_format_write_statistics = '0', output_format_pretty_row_numbers = '1'; DROP TABLE data_a_02187; diff --git a/tests/queries/0_stateless/02551_obfuscator_keywords.reference b/tests/queries/0_stateless/02551_obfuscator_keywords.reference index fc1d5e81bc8..94370f7112c 100644 --- a/tests/queries/0_stateless/02551_obfuscator_keywords.reference +++ b/tests/queries/0_stateless/02551_obfuscator_keywords.reference @@ -1,3 +1,3 @@ select 1 order by 1 with fill step 1 -SELECT id, untuple(id) FROM id +SELECT id, mannWhitneyUTest(id) FROM id SELECT 1 IS NULL diff --git a/tests/queries/0_stateless/02551_obfuscator_keywords.sh b/tests/queries/0_stateless/02551_obfuscator_keywords.sh index a17ad670f0a..342ebeab910 100755 --- a/tests/queries/0_stateless/02551_obfuscator_keywords.sh +++ b/tests/queries/0_stateless/02551_obfuscator_keywords.sh @@ -7,5 +7,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) obf="$CLICKHOUSE_FORMAT --obfuscate" echo "select 1 order by 1 with fill step 1" | $obf -echo "SELECT id, untuple(id) FROM id" | $obf +echo "SELECT id, mannWhitneyUTest(id) FROM id" | $obf echo "SELECT 1 IS NULL" | $obf diff --git a/tests/queries/0_stateless/02555_davengers_rename_chain.reference b/tests/queries/0_stateless/02555_davengers_rename_chain.reference index a9fc4b395e2..6512857f2ba 100644 --- a/tests/queries/0_stateless/02555_davengers_rename_chain.reference +++ b/tests/queries/0_stateless/02555_davengers_rename_chain.reference @@ -1,26 +1,38 @@ +ALTER TABLE wrong_metadata RENAME COLUMN a TO a1, RENAME COLUMN b to b1 SETTINGS replication_alter_partitions_sync = 0; +SELECT * FROM wrong_metadata ORDER BY a1 FORMAT JSONEachRow; {"a1":"1","b1":"2","c":"3"} -~~~~~~~ +INSERT INTO wrong_metadata VALUES (4, 5, 6); +SELECT * FROM wrong_metadata ORDER BY a1 FORMAT JSONEachRow; {"a1":"1","b1":"2","c":"3"} {"a1":"4","b1":"5","c":"6"} -~~~~~~~ +ALTER TABLE wrong_metadata RENAME COLUMN a1 TO b, RENAME COLUMN b1 to a SETTINGS replication_alter_partitions_sync = 0; +INSERT INTO wrong_metadata VALUES (7, 8, 9); +SELECT * FROM wrong_metadata ORDER by a1 FORMAT JSONEachRow; {"a1":"1","b1":"2","c":"3"} {"a1":"4","b1":"5","c":"6"} {"a1":"7","b1":"8","c":"9"} -~~~~~~~ +SYSTEM START MERGES wrong_metadata; +SYSTEM SYNC REPLICA wrong_metadata; +SELECT * FROM wrong_metadata order by a FORMAT JSONEachRow; {"b":"1","a":"2","c":"3"} {"b":"4","a":"5","c":"6"} {"b":"7","a":"8","c":"9"} -~~~~~~~ +ALTER TABLE wrong_metadata_compact RENAME COLUMN a TO a1, RENAME COLUMN b to b1 SETTINGS replication_alter_partitions_sync = 0; +SELECT * FROM wrong_metadata_compact ORDER BY a1 FORMAT JSONEachRow; {"a1":"1","b1":"2","c":"3"} -~~~~~~~ +INSERT INTO wrong_metadata_compact VALUES (4, 5, 6); +SELECT * FROM wrong_metadata_compact ORDER BY a1 FORMAT JSONEachRow; {"a1":"1","b1":"2","c":"3"} {"a1":"4","b1":"5","c":"6"} -~~~~~~~ +ALTER TABLE wrong_metadata_compact RENAME COLUMN a1 TO b, RENAME COLUMN b1 to a SETTINGS replication_alter_partitions_sync = 0; +INSERT INTO wrong_metadata_compact VALUES (7, 8, 9); +SELECT * FROM wrong_metadata_compact ORDER by a1 FORMAT JSONEachRow; {"a1":"1","b1":"2","c":"3"} {"a1":"4","b1":"5","c":"6"} {"a1":"7","b1":"8","c":"9"} -~~~~~~~ +SYSTEM START MERGES wrong_metadata_compact; +SYSTEM SYNC REPLICA wrong_metadata_compact; +SELECT * FROM wrong_metadata_compact order by a FORMAT JSONEachRow; {"b":"1","a":"2","c":"3"} {"b":"4","a":"5","c":"6"} {"b":"7","a":"8","c":"9"} -~~~~~~~ diff --git a/tests/queries/0_stateless/02555_davengers_rename_chain.sh b/tests/queries/0_stateless/02555_davengers_rename_chain.sh index b23f8085fd7..660a95846c4 100755 --- a/tests/queries/0_stateless/02555_davengers_rename_chain.sh +++ b/tests/queries/0_stateless/02555_davengers_rename_chain.sh @@ -3,141 +3,101 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh +# shellcheck source=./mergetree_mutations.lib +. "$CUR_DIR"/mergetree_mutations.lib -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS wrong_metadata" +function wait_column() +{ + local table=$1 && shift + local column=$1 && shift -$CLICKHOUSE_CLIENT -n --query="CREATE TABLE wrong_metadata( - a UInt64, - b UInt64, - c UInt64 -) -ENGINE ReplicatedMergeTree('/test/{database}/tables/wrong_metadata', '1') -ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 0" + for _ in {1..60}; do + result=$($CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE $table") + if [[ $result == *"$column"* ]]; then + return 0 + fi + sleep 0.1 + done -$CLICKHOUSE_CLIENT --query="INSERT INTO wrong_metadata VALUES (1, 2, 3)" + echo "[$table] Cannot wait for column to appear" >&2 + return 1 +} +function wait_mutation_loaded() +{ + local table=$1 && shift + local expr=$1 && shift + for _ in {1..60}; do + result=$($CLICKHOUSE_CLIENT --query "SELECT * FROM system.mutations WHERE table = '$table' AND database='$CLICKHOUSE_DATABASE'") + if [[ $result == *"$expr"* ]]; then + return 0 + fi + sleep 0.1 + done -$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES wrong_metadata" + echo "[$table] Cannot wait mutation $expr" >&2 + return 1 +} +declare -A tables +tables["wrong_metadata"]="min_bytes_for_wide_part = 0" +tables["wrong_metadata_compact"]="min_bytes_for_wide_part = 10000000" -$CLICKHOUSE_CLIENT --query="ALTER TABLE wrong_metadata RENAME COLUMN a TO a1, RENAME COLUMN b to b1 SETTINGS replication_alter_partitions_sync = 0" +for table in "${!tables[@]}"; do + settings="${tables[$table]}" -counter=0 retries=60 -I=0 -while [[ $counter -lt $retries ]]; do - I=$((I + 1)) - result=$($CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE wrong_metadata") - if [[ $result == *"\`a1\` UInt64"* ]]; then - break; - fi - sleep 0.1 - ((++counter)) -done + $CLICKHOUSE_CLIENT -n --query=" + DROP TABLE IF EXISTS $table; + CREATE TABLE $table( + a UInt64, + b UInt64, + c UInt64 + ) + ENGINE ReplicatedMergeTree('/test/{database}/tables/$table', '1') + ORDER BY tuple() + SETTINGS $settings; -$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata ORDER BY a1 FORMAT JSONEachRow" + INSERT INTO $table VALUES (1, 2, 3); + SYSTEM STOP MERGES $table; -$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" + -- { echoOn } + SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null; -$CLICKHOUSE_CLIENT --query="INSERT INTO wrong_metadata VALUES (4, 5, 6)" + ALTER TABLE $table RENAME COLUMN a TO a1, RENAME COLUMN b to b1 SETTINGS replication_alter_partitions_sync = 0; + " + wait_column "$table" "\`a1\` UInt64" || exit 2 -$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata ORDER BY a1 FORMAT JSONEachRow" -$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" + $CLICKHOUSE_CLIENT -n --query=" + -- { echoOn } + SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null; + SELECT * FROM $table ORDER BY a1 FORMAT JSONEachRow; + INSERT INTO $table VALUES (4, 5, 6); + SELECT * FROM $table ORDER BY a1 FORMAT JSONEachRow; -$CLICKHOUSE_CLIENT --query="ALTER TABLE wrong_metadata RENAME COLUMN a1 TO b, RENAME COLUMN b1 to a SETTINGS replication_alter_partitions_sync = 0" + ALTER TABLE $table RENAME COLUMN a1 TO b, RENAME COLUMN b1 to a SETTINGS replication_alter_partitions_sync = 0; + " -counter=0 retries=60 -I=0 -while [[ $counter -lt $retries ]]; do - I=$((I + 1)) - result=$($CLICKHOUSE_CLIENT --query "SELECT * FROM system.mutations WHERE table = 'wrong_metadata' AND database='${CLICKHOUSE_DATABASE}'") - if [[ $result == *"b1 TO a"* ]]; then - break; - fi - sleep 0.1 - ((++counter)) -done + wait_mutation_loaded "$table" "b1 TO a" || exit 2 -$CLICKHOUSE_CLIENT --query="INSERT INTO wrong_metadata VALUES (7, 8, 9)" + $CLICKHOUSE_CLIENT -n --query=" + -- { echoOn } + SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null; -$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata ORDER by a1 FORMAT JSONEachRow" -$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" + INSERT INTO $table VALUES (7, 8, 9); + SELECT * FROM $table ORDER by a1 FORMAT JSONEachRow; + SYSTEM START MERGES $table; + SYSTEM SYNC REPLICA $table; + " -$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES wrong_metadata" + wait_for_all_mutations "$table" -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA wrong_metadata" + $CLICKHOUSE_CLIENT -n --query=" + -- { echoOn } + SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null; -$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata order by a FORMAT JSONEachRow" - -$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" - - -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS wrong_metadata" - -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS wrong_metadata_compact" - -$CLICKHOUSE_CLIENT -n --query="CREATE TABLE wrong_metadata_compact( - a UInt64, - b UInt64, - c UInt64 -) -ENGINE ReplicatedMergeTree('/test/{database}/tables/wrong_metadata_compact', '1') -ORDER BY tuple() -SETTINGS min_bytes_for_wide_part = 10000000" - -$CLICKHOUSE_CLIENT --query="INSERT INTO wrong_metadata_compact VALUES (1, 2, 3)" - -$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES wrong_metadata_compact" - -$CLICKHOUSE_CLIENT --query="ALTER TABLE wrong_metadata_compact RENAME COLUMN a TO a1, RENAME COLUMN b to b1 SETTINGS replication_alter_partitions_sync = 0" - -counter=0 retries=60 -I=0 -while [[ $counter -lt $retries ]]; do - I=$((I + 1)) - result=$($CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE wrong_metadata_compact") - if [[ $result == *"\`a1\` UInt64"* ]]; then - break; - fi - sleep 0.1 - ((++counter)) -done - -$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata_compact ORDER BY a1 FORMAT JSONEachRow" -$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" - -$CLICKHOUSE_CLIENT --query="INSERT INTO wrong_metadata_compact VALUES (4, 5, 6)" - -$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata_compact ORDER BY a1 FORMAT JSONEachRow" -$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" - -$CLICKHOUSE_CLIENT --query="ALTER TABLE wrong_metadata_compact RENAME COLUMN a1 TO b, RENAME COLUMN b1 to a SETTINGS replication_alter_partitions_sync = 0" - -counter=0 retries=60 -I=0 -while [[ $counter -lt $retries ]]; do - I=$((I + 1)) - result=$($CLICKHOUSE_CLIENT --query "SELECT * FROM system.mutations WHERE table = 'wrong_metadata_compact' AND database='${CLICKHOUSE_DATABASE}'") - if [[ $result == *"b1 TO a"* ]]; then - break; - fi - sleep 0.1 - ((++counter)) -done - -$CLICKHOUSE_CLIENT --query="INSERT INTO wrong_metadata_compact VALUES (7, 8, 9)" - -$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata_compact ORDER by a1 FORMAT JSONEachRow" -$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" - -$CLICKHOUSE_CLIENT --query="SYSTEM START MERGES wrong_metadata_compact" - -$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA wrong_metadata_compact" - -$CLICKHOUSE_CLIENT --query="SELECT * FROM wrong_metadata_compact order by a FORMAT JSONEachRow" -$CLICKHOUSE_CLIENT --query="SELECT '~~~~~~~'" - -$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS wrong_metadata_compact" + SELECT * FROM $table order by a FORMAT JSONEachRow; + " +done |& grep -v -F -x -e '-- { echoOn }' -e " SELECT 'ECHO_ALIGNMENT_FIX' FORMAT Null;" diff --git a/tests/queries/0_stateless/02710_default_replicated_parameters.sql b/tests/queries/0_stateless/02710_default_replicated_parameters.sql index 279b7e81bdd..faeea6cdf02 100644 --- a/tests/queries/0_stateless/02710_default_replicated_parameters.sql +++ b/tests/queries/0_stateless/02710_default_replicated_parameters.sql @@ -1,7 +1,5 @@ -- Tags: no-parallel -SET allow_experimental_database_replicated=1; - DROP DATABASE IF EXISTS replicated_database_params; CREATE DATABASE replicated_database_params ENGINE = Replicated('some/path/' || currentDatabase() || '/replicated_database_params'); diff --git a/tests/queries/0_stateless/02762_replicated_database_no_args.sql b/tests/queries/0_stateless/02762_replicated_database_no_args.sql index 1409a059b02..b811bb7d428 100644 --- a/tests/queries/0_stateless/02762_replicated_database_no_args.sql +++ b/tests/queries/0_stateless/02762_replicated_database_no_args.sql @@ -1,4 +1,3 @@ -- Tags: no-parallel -set allow_experimental_database_replicated=1; create database replicated_db_no_args engine=Replicated; -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index c295f5be43b..8a6904b6bd7 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --allow_experimental_database_replicated=1 --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" +${CLICKHOUSE_CLIENT} --query "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" # Non-replicated engines are allowed ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test (id UInt64) ENGINE = MergeTree() ORDER BY id AS SELECT 1" # Replicated storafes are forbidden diff --git a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.reference b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.reference index 6ed281c757a..d00491fd7e5 100644 --- a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.reference +++ b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.reference @@ -1,2 +1 @@ 1 -1 diff --git a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh deleted file mode 100755 index 98575540923..00000000000 --- a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -TABLE="03000_traverse_shadow_system_data_path_table" -BACKUP="03000_traverse_shadow_system_data_path_backup" - -${CLICKHOUSE_CLIENT} --query="CREATE TABLE ${TABLE} ( - id Int64, - data String -) ENGINE=MergeTree() -ORDER BY id -SETTINGS storage_policy='s3_cache';" - -${CLICKHOUSE_CLIENT} --query="INSERT INTO ${TABLE} VALUES (0, 'data');" -${CLICKHOUSE_CLIENT} --query "SELECT count() > 0 FROM system.remote_data_paths WHERE disk_name = 's3_cache'" - -${CLICKHOUSE_CLIENT} --query="ALTER TABLE ${TABLE} FREEZE WITH NAME '${BACKUP}';" -${CLICKHOUSE_CLIENT} --query="DROP TABLE ${TABLE} SYNC;" - -${CLICKHOUSE_CLIENT} --query " - SELECT count() > 0 - FROM system.remote_data_paths - WHERE disk_name = 's3_cache' AND local_path LIKE '%shadow/${BACKUP}%' - SETTINGS traverse_shadow_remote_data_paths=1;" -${CLICKHOUSE_CLIENT} --query "SYSTEM UNFREEZE WITH NAME '${BACKUP}';" &>/dev/null || true diff --git a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sql b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sql new file mode 100644 index 00000000000..514b4227b71 --- /dev/null +++ b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sql @@ -0,0 +1,20 @@ +-- Tags: no-replicated-database, no-fasttest + +DROP TABLE IF EXISTS 03000_traverse_shadow_system_data_path_table; + +CREATE TABLE 03000_traverse_shadow_system_data_path_table ( + id Int64, + data String +) ENGINE=MergeTree() +ORDER BY id +SETTINGS storage_policy='s3_cache'; + +INSERT INTO 03000_traverse_shadow_system_data_path_table VALUES (0, 'data'); +ALTER TABLE 03000_traverse_shadow_system_data_path_table FREEZE WITH NAME '03000_traverse_shadow_system_data_path_table_backup'; + +SELECT count() > 0 +FROM system.remote_data_paths +WHERE disk_name = 's3_cache' AND local_path LIKE '%shadow/03000_traverse_shadow_system_data_path_table_backup%' +SETTINGS traverse_shadow_remote_data_paths=1; + +DROP TABLE IF EXISTS 03000_traverse_shadow_system_data_path_table; diff --git a/tests/queries/0_stateless/03003_count_asterisk_filter.reference b/tests/queries/0_stateless/03003_count_asterisk_filter.reference new file mode 100644 index 00000000000..be589c9ceb0 --- /dev/null +++ b/tests/queries/0_stateless/03003_count_asterisk_filter.reference @@ -0,0 +1,4 @@ +1 +1 +2 +2 diff --git a/tests/queries/0_stateless/03003_count_asterisk_filter.sql b/tests/queries/0_stateless/03003_count_asterisk_filter.sql new file mode 100644 index 00000000000..9bd10dfae25 --- /dev/null +++ b/tests/queries/0_stateless/03003_count_asterisk_filter.sql @@ -0,0 +1,13 @@ +CREATE TABLE users (uid Int16, name Nullable(String), age Int16) ENGINE=Memory; + +INSERT INTO users VALUES (1231, 'John', 33); +INSERT INTO users VALUES (6666, Null, 48); +INSERT INTO users VALUES (8888, 'Alice', 50); + +SELECT count(name) FILTER (WHERE uid > 2000) FROM users; +SELECT countIf(name, uid > 2000) FROM users; + +SELECT count(*) FILTER (WHERE uid > 2000) FROM users; +SELECT countIf(uid > 2000) FROM users; + +DROP TABLE users; diff --git a/tests/queries/0_stateless/03033_dist_settings.optimize_skip_unused_shards_rewrite_in_composite_sharding_key.reference b/tests/queries/0_stateless/03033_dist_settings.optimize_skip_unused_shards_rewrite_in_composite_sharding_key.reference new file mode 100644 index 00000000000..93f0f004541 --- /dev/null +++ b/tests/queries/0_stateless/03033_dist_settings.optimize_skip_unused_shards_rewrite_in_composite_sharding_key.reference @@ -0,0 +1,10 @@ +-- { echoOn } +SELECT shardNum(), count() FROM dt WHERE (tag_id, tag_name) IN ((1, 'foo1'), (1, 'foo2')) GROUP BY 1 ORDER BY 1; +1 2 +2 2 +SELECT shardNum(), count() FROM dt WHERE tag_id IN (1, 1) AND tag_name IN ('foo1', 'foo2') GROUP BY 1 ORDER BY 1; +1 2 +2 2 +SELECT shardNum(), count() FROM dt WHERE tag_id = 1 AND tag_name IN ('foo1', 'foo2') GROUP BY 1 ORDER BY 1; +1 2 +2 2 diff --git a/tests/queries/0_stateless/03033_dist_settings.optimize_skip_unused_shards_rewrite_in_composite_sharding_key.sql b/tests/queries/0_stateless/03033_dist_settings.optimize_skip_unused_shards_rewrite_in_composite_sharding_key.sql new file mode 100644 index 00000000000..b68fc27722e --- /dev/null +++ b/tests/queries/0_stateless/03033_dist_settings.optimize_skip_unused_shards_rewrite_in_composite_sharding_key.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS t; +DROP TABLE IF EXISTS dt; + +CREATE TABLE t (tag_id UInt64, tag_name String) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE dt AS t ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), 't', cityHash64(concat(tag_id, tag_name))); + +INSERT INTO dt SETTINGS distributed_foreground_insert=1 VALUES (1, 'foo1'); -- shard0 +INSERT INTO dt SETTINGS distributed_foreground_insert=1 VALUES (1, 'foo2'); -- shard1 + +SET optimize_skip_unused_shards=1, optimize_skip_unused_shards_rewrite_in=1; +-- { echoOn } +SELECT shardNum(), count() FROM dt WHERE (tag_id, tag_name) IN ((1, 'foo1'), (1, 'foo2')) GROUP BY 1 ORDER BY 1; +SELECT shardNum(), count() FROM dt WHERE tag_id IN (1, 1) AND tag_name IN ('foo1', 'foo2') GROUP BY 1 ORDER BY 1; +SELECT shardNum(), count() FROM dt WHERE tag_id = 1 AND tag_name IN ('foo1', 'foo2') GROUP BY 1 ORDER BY 1; diff --git a/tests/queries/0_stateless/03033_from_unixtimestamp_joda_by_int64.reference b/tests/queries/0_stateless/03033_from_unixtimestamp_joda_by_int64.reference new file mode 100644 index 00000000000..946dd358dc9 --- /dev/null +++ b/tests/queries/0_stateless/03033_from_unixtimestamp_joda_by_int64.reference @@ -0,0 +1 @@ +2295-03-20 00:03:16 diff --git a/tests/queries/0_stateless/03033_from_unixtimestamp_joda_by_int64.sql b/tests/queries/0_stateless/03033_from_unixtimestamp_joda_by_int64.sql new file mode 100644 index 00000000000..69d898d303e --- /dev/null +++ b/tests/queries/0_stateless/03033_from_unixtimestamp_joda_by_int64.sql @@ -0,0 +1 @@ +select fromUnixTimestampInJodaSyntax(10262736196, 'YYYY-MM-dd HH:mm:ss', 'Asia/Shanghai'); \ No newline at end of file diff --git a/tests/queries/0_stateless/03093_bug_gcd_codec.reference b/tests/queries/0_stateless/03093_bug_gcd_codec.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03093_bug_gcd_codec.sql b/tests/queries/0_stateless/03093_bug_gcd_codec.sql new file mode 100644 index 00000000000..4a8370c6bc8 --- /dev/null +++ b/tests/queries/0_stateless/03093_bug_gcd_codec.sql @@ -0,0 +1,17 @@ +-- Tags: long + +CREATE TABLE test_gcd(test_col UInt32 CODEC(GCD, LZ4)) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192, index_granularity_bytes = 1024; + +INSERT INTO test_gcd SELECT floor(randUniform(1, 3)) FROM numbers(150000); +OPTIMIZE TABLE test_gcd FINAL; + +CREATE TABLE test_gcd2(test_col UInt32 CODEC(GCD, LZ4)) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192, index_granularity_bytes = 1024, min_bytes_for_wide_part = 0, max_compress_block_size = 1024, min_compress_block_size = 1024; + +INSERT INTO test_gcd2 SELECT floor(randUniform(1, 3)) FROM numbers(150000); +OPTIMIZE TABLE test_gcd2 FINAL; diff --git a/tests/queries/0_stateless/03132_sqlancer_union_all.reference b/tests/queries/0_stateless/03132_sqlancer_union_all.reference new file mode 100644 index 00000000000..71963122477 --- /dev/null +++ b/tests/queries/0_stateless/03132_sqlancer_union_all.reference @@ -0,0 +1,2 @@ +278926179 +278926179 diff --git a/tests/queries/0_stateless/03132_sqlancer_union_all.sql b/tests/queries/0_stateless/03132_sqlancer_union_all.sql new file mode 100644 index 00000000000..576da9e53d5 --- /dev/null +++ b/tests/queries/0_stateless/03132_sqlancer_union_all.sql @@ -0,0 +1,30 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS t0; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; +DROP TABLE IF EXISTS t4; + +CREATE TABLE t0 (c0 String) ENGINE = Memory() ; +CREATE TABLE IF NOT EXISTS t1 (c0 Int32, c1 Int32, c2 ALIAS c1) ENGINE = Log() ; +CREATE TABLE t2 (c0 Int32) ENGINE = MergeTree() ORDER BY tuple() ; +CREATE TABLE t3 (c0 String) ENGINE = Memory() ; +CREATE TABLE t4 (c0 Int32) ENGINE = Memory() ; +INSERT INTO t4(c0) VALUES (-405831124); +INSERT INTO t1(c1, c0) VALUES (278926179, 891140511); +INSERT INTO t4(c0) VALUES (1586457527); +INSERT INTO t3(c0) VALUES ('?/|D!6 '), ('1586457527'); +INSERT INTO t2(c0) VALUES (1475250982); + +SELECT t1.c1 +FROM t3, t1 +WHERE true AND t1.c2 +UNION ALL +SELECT t1.c1 +FROM t3, t1 +WHERE NOT t1.c2 +UNION ALL +SELECT t1.c1 +FROM t3, t1 +WHERE t1.c2 IS NULL; diff --git a/tests/queries/0_stateless/03133_help_message_verbosity.reference b/tests/queries/0_stateless/03133_help_message_verbosity.reference new file mode 100644 index 00000000000..2c94e483710 --- /dev/null +++ b/tests/queries/0_stateless/03133_help_message_verbosity.reference @@ -0,0 +1,2 @@ +OK +OK diff --git a/tests/queries/0_stateless/03133_help_message_verbosity.sh b/tests/queries/0_stateless/03133_help_message_verbosity.sh new file mode 100755 index 00000000000..cdf01d4de2d --- /dev/null +++ b/tests/queries/0_stateless/03133_help_message_verbosity.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +# Checks that "clickhouse-client/local --help" prints a brief summary of CLI arguments and "--help --verbose" prints all possible CLI arguments +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Unique identifier for concurrent execution +PID=$$ + +# Get the help message in short and verbose form and put them into txt files +$CLICKHOUSE_CLIENT --help > "help_msg_$PID.txt" +$CLICKHOUSE_CLIENT --help --verbose > "verbose_help_msg_$PID.txt" + +# Sizes of files +size_short=$(stat -c %s "help_msg_$PID.txt") +size_verbose=$(stat -c %s "verbose_help_msg_$PID.txt") + +# If the size of the short help message is less, everything is OK +if [ $size_short -lt $size_verbose ]; then + echo "OK" +else + echo "Not OK" +fi + +rm "help_msg_$PID.txt" +rm "verbose_help_msg_$PID.txt" + +# The same for clickhouse local +$CLICKHOUSE_LOCAL --help > "help_msg_$PID.txt" +$CLICKHOUSE_LOCAL --help --verbose > "verbose_help_msg_$PID.txt" + +size_short=$(stat -c %s "help_msg_$PID.txt") +size_verbose=$(stat -c %s "verbose_help_msg_$PID.txt") + +if [ $size_short -lt $size_verbose ]; then + echo "OK" +else + echo "Not OK" +fi + +rm "help_msg_$PID.txt" +rm "verbose_help_msg_$PID.txt" diff --git a/tests/queries/0_stateless/mergetree_mutations.lib b/tests/queries/0_stateless/mergetree_mutations.lib index 7a27200c523..b11b2e6b852 100644 --- a/tests/queries/0_stateless/mergetree_mutations.lib +++ b/tests/queries/0_stateless/mergetree_mutations.lib @@ -29,7 +29,6 @@ function wait_for_all_mutations() for i in {1..200} do - sleep 1 if [[ $(${CLICKHOUSE_CLIENT} --query="SELECT coalesce(minOrNull(is_done), 1) FROM system.mutations WHERE database='$database' AND table like '$table'") -eq 1 ]]; then break fi @@ -38,6 +37,7 @@ function wait_for_all_mutations() echo "Timed out while waiting for mutation to execute!" fi + sleep 0.1 done } diff --git a/tests/tsan_ignorelist.txt b/tests/tsan_ignorelist.txt index b565cc91411..96bf6e4251f 100644 --- a/tests/tsan_ignorelist.txt +++ b/tests/tsan_ignorelist.txt @@ -3,8 +3,13 @@ # [1]: https://clang.llvm.org/docs/SanitizerSpecialCaseList.html # [2]: https://github.com/google/sanitizers/wiki/ThreadSanitizerSuppressions # - +# Caveats for generic entry "fun": +# - does not work for __attribute__((__always_inline__)) +# - requires asterisk at the beginning *and* end for static functions +# +[thread] # https://github.com/ClickHouse/ClickHouse/issues/55629 fun:rd_kafka_broker_set_nodename # https://github.com/ClickHouse/ClickHouse/issues/60443 -fun:rd_kafka_stats_emit_all +fun:*rd_avg_calc* +fun:*rd_avg_rollover* diff --git a/tests/ubsan_ignorelist.txt b/tests/ubsan_ignorelist.txt index e89127507c0..57d6598afa6 100644 --- a/tests/ubsan_ignorelist.txt +++ b/tests/ubsan_ignorelist.txt @@ -6,7 +6,12 @@ # See also [3] for all UBSan checks. # # [3]: https://github.com/llvm-mirror/compiler-rt/blob/master/lib/ubsan/ubsan_checks.inc - +# +# Caveats for generic entry "fun": +# - does not work for __attribute__((__always_inline__)) +# - requires asterisk at the beginning *and* end for static functions +# +[undefined] # Some value is outside the range of representable values of type 'long' on user-provided data inside boost::geometry - ignore. src:*/Functions/pointInPolygon.cpp src:*/contrib/boost/boost/geometry/* diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 6a95a68dea5..e7477ffc5e1 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -272,8 +272,8 @@ ExactEdgeLengthRads ExecutablePool ExtType ExternalDistributed -FFFFFFFF FFFD +FFFFFFFF FIPS FOSDEM FQDN @@ -563,8 +563,6 @@ MinIO MinMax MindsDB Mongodb -mortonDecode -mortonEncode MsgPack multiSearchAllPositionsCaseInsensitive multiSearchAllPositionsCaseInsensitiveUTF @@ -1060,6 +1058,7 @@ YYYYMMDDToDate YYYYMMDDhhmmssToDateTime Yandex Yasm +ZCurve ZSTDQAT Zabbix Zipkin @@ -1362,6 +1361,7 @@ configs congruential conjuction conjuctive +connectionId const contrib convertCharset @@ -1970,6 +1970,8 @@ moduloOrZero moduli mongodb monthName +mortonDecode +mortonEncode moscow msgpack msgpk @@ -2591,6 +2593,7 @@ toJSONString toLastDayOfMonth toLastDayOfWeek toLowCardinality +toMillisecond toMinute toModifiedJulianDay toModifiedJulianDayOrNull @@ -2607,7 +2610,6 @@ toRelativeSecondNum toRelativeWeekNum toRelativeYearNum toSecond -toMillisecond toStartOfDay toStartOfFifteenMinutes toStartOfFiveMinutes @@ -2815,7 +2817,6 @@ xz yaml yandex youtube -ZCurve zLib zLinux zabbix diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 060a0107c1e..375017c0cb0 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -33,6 +33,7 @@ v23.9.4.11-stable 2023-11-08 v23.9.3.12-stable 2023-10-31 v23.9.2.56-stable 2023-10-19 v23.9.1.1854-stable 2023-09-29 +v23.8.13.25-lts 2024-04-26 v23.8.12.13-lts 2024-03-26 v23.8.11.28-lts 2024-03-15 v23.8.10.43-lts 2024-03-05