diff --git a/.exrc b/.exrc new file mode 100644 index 00000000000..162bd41ce4f --- /dev/null +++ b/.exrc @@ -0,0 +1 @@ +au BufRead,BufNewFile * set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab tags=tags,../tags diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index e03e5c543c2..1b03b6fde3c 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -51,7 +51,7 @@ jobs: --gh-user-or-token="$GITHUB_TOKEN" --jobs=5 \ --output="/ClickHouse/docs/changelogs/${GITHUB_TAG}.md" "${GITHUB_TAG}" git add "./docs/changelogs/${GITHUB_TAG}.md" - python ./utils/security-generator/generate_security.py > SECURITY.md + python3 ./utils/security-generator/generate_security.py > SECURITY.md git diff HEAD - name: Create Pull Request uses: peter-evans/create-pull-request@v3 diff --git a/.gitignore b/.gitignore index 09d3f4a4e33..6d94cade384 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ # logs *.log +*.debuglog *.stderr *.stdout diff --git a/.vimrc b/.vimrc deleted file mode 100644 index ba996eb8a42..00000000000 --- a/.vimrc +++ /dev/null @@ -1,2 +0,0 @@ -au BufRead,BufNewFile ./* set tabstop=4 softtabstop=0 expandtab shiftwidth=4 smarttab tags=tags,../tags - diff --git a/contrib/NuRaft b/contrib/NuRaft index e4e746a24eb..afc36dfa9b0 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit e4e746a24eb56861a86f3672771e3308d8c40722 +Subproject commit afc36dfa9b0beb45bc4cd935060631cc80ba04a5 diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 7a034c741eb..cb23372d31f 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -295,6 +295,9 @@ if not args.use_existing_tables: reportStageEnd("create") +# Let's sync the data to avoid writeback affects performance +os.system("sync") + # By default, test all queries. queries_to_run = range(0, len(test_queries)) diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index cb8c914e53d..e8c5e17024c 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -17,7 +17,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ python3-pip \ shellcheck \ yamllint \ - && pip3 install black==22.8.0 boto3 codespell==2.2.1 dohq-artifactory PyGithub unidiff pylint==2.6.2 \ + && pip3 install black==22.8.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \ && apt-get clean \ && rm -rf /root/.cache/pip diff --git a/docker/test/style/process_style_check_result.py b/docker/test/style/process_style_check_result.py index 8c2110d64e5..6dc3d05d051 100755 --- a/docker/test/style/process_style_check_result.py +++ b/docker/test/style/process_style_check_result.py @@ -11,17 +11,19 @@ def process_result(result_folder): description = "" test_results = [] checks = ( - ("header duplicates", "duplicate_output.txt"), - ("shellcheck", "shellcheck_output.txt"), - ("style", "style_output.txt"), - ("black", "black_output.txt"), - ("typos", "typos_output.txt"), - ("whitespaces", "whitespaces_output.txt"), - ("workflows", "workflows_output.txt"), - ("doc typos", "doc_spell_output.txt"), + "duplicate includes", + "shellcheck", + "style", + "black", + "mypy", + "typos", + "whitespaces", + "workflows", + "docs spelling", ) - for name, out_file in checks: + for name in checks: + out_file = name.replace(" ", "_") + "_output.txt" full_path = os.path.join(result_folder, out_file) if not os.path.exists(full_path): logging.info("No %s check log on path %s", name, full_path) diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 06ecadbfebf..80911bf8627 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -4,15 +4,17 @@ cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv echo "Check duplicates" | ts -./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt +./check-duplicate-includes.sh |& tee /test_output/duplicate_includes_output.txt echo "Check style" | ts ./check-style -n |& tee /test_output/style_output.txt echo "Check python formatting with black" | ts ./check-black -n |& tee /test_output/black_output.txt +echo "Check python type hinting with mypy" | ts +./check-mypy -n |& tee /test_output/mypy_output.txt echo "Check typos" | ts ./check-typos |& tee /test_output/typos_output.txt echo "Check docs spelling" | ts -./check-doc-aspell |& tee /test_output/doc_spell_output.txt +./check-doc-aspell |& tee /test_output/docs_spelling_output.txt echo "Check whitespaces" | ts ./check-whitespaces -n |& tee /test_output/whitespaces_output.txt echo "Check workflows" | ts diff --git a/docs/changelogs/v22.10.3.27-stable.md b/docs/changelogs/v22.10.3.27-stable.md new file mode 100644 index 00000000000..db49a042434 --- /dev/null +++ b/docs/changelogs/v22.10.3.27-stable.md @@ -0,0 +1,32 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.10.3.27-stable (6d3b2985724) FIXME as compared to v22.10.2.11-stable (d2bfcaba002) + +#### Improvement +* Backported in [#42842](https://github.com/ClickHouse/ClickHouse/issues/42842): Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#42959](https://github.com/ClickHouse/ClickHouse/issues/42959): Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#43042](https://github.com/ClickHouse/ClickHouse/issues/43042): Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#42864](https://github.com/ClickHouse/ClickHouse/issues/42864): Fix lowerUTF8()/upperUTF8() in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#43173](https://github.com/ClickHouse/ClickHouse/issues/43173): Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#43064](https://github.com/ClickHouse/ClickHouse/issues/43064): Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#43075](https://github.com/ClickHouse/ClickHouse/issues/43075): Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#43444](https://github.com/ClickHouse/ClickHouse/issues/43444): - Fix several buffer over-reads. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#43430](https://github.com/ClickHouse/ClickHouse/issues/43430): Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix a bug in CAST function parser [#42980](https://github.com/ClickHouse/ClickHouse/pull/42980) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix TSan errors (correctly ignore _exit interception) [#43009](https://github.com/ClickHouse/ClickHouse/pull/43009) ([Azat Khuzhin](https://github.com/azat)). +* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.10.4.23-stable.md b/docs/changelogs/v22.10.4.23-stable.md new file mode 100644 index 00000000000..a2b45cd9dcf --- /dev/null +++ b/docs/changelogs/v22.10.4.23-stable.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.10.4.23-stable (352772987f4) FIXME as compared to v22.10.3.27-stable (6d3b2985724) + +#### Backward Incompatible Change +* Backported in [#43487](https://github.com/ClickHouse/ClickHouse/issues/43487): Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then incompatible versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)). + +#### Build/Testing/Packaging Improvement +* Backported in [#43053](https://github.com/ClickHouse/ClickHouse/issues/43053): Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#43715](https://github.com/ClickHouse/ClickHouse/issues/43715): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#43576](https://github.com/ClickHouse/ClickHouse/issues/43576): Fix possible `Cannot create non-empty column with type Nothing` in functions if/multiIf. Closes [#43356](https://github.com/ClickHouse/ClickHouse/issues/43356). [#43368](https://github.com/ClickHouse/ClickHouse/pull/43368) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#43506](https://github.com/ClickHouse/ClickHouse/issues/43506): Fix a bug when row level filter uses default value of column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#43723](https://github.com/ClickHouse/ClickHouse/issues/43723): Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix pagination issue in GITHUB_JOB_ID() [#43681](https://github.com/ClickHouse/ClickHouse/pull/43681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.11.2.30-stable.md b/docs/changelogs/v22.11.2.30-stable.md new file mode 100644 index 00000000000..a220c469f7f --- /dev/null +++ b/docs/changelogs/v22.11.2.30-stable.md @@ -0,0 +1,33 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.11.2.30-stable (28f72d8ab09) FIXME as compared to v22.11.1.1360-stable (0d211ed1984) + +#### Backward Incompatible Change +* Backported in [#43488](https://github.com/ClickHouse/ClickHouse/issues/43488): Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then incompatible versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)). + +#### Improvement +* Backported in [#43511](https://github.com/ClickHouse/ClickHouse/issues/43511): Restrict default access to named collections for user defined in config. It must have explicit `show_named_collections=1` to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#43716](https://github.com/ClickHouse/ClickHouse/issues/43716): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#43431](https://github.com/ClickHouse/ClickHouse/issues/43431): Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#43577](https://github.com/ClickHouse/ClickHouse/issues/43577): Fix possible `Cannot create non-empty column with type Nothing` in functions if/multiIf. Closes [#43356](https://github.com/ClickHouse/ClickHouse/issues/43356). [#43368](https://github.com/ClickHouse/ClickHouse/pull/43368) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#43507](https://github.com/ClickHouse/ClickHouse/issues/43507): Fix a bug when row level filter uses default value of column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#43724](https://github.com/ClickHouse/ClickHouse/issues/43724): Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#43807](https://github.com/ClickHouse/ClickHouse/issues/43807): Optimized number of List requests to ZooKeeper when selecting a part to merge. Previously it could produce thousands of requests in some cases. Fixes [#43647](https://github.com/ClickHouse/ClickHouse/issues/43647). [#43675](https://github.com/ClickHouse/ClickHouse/pull/43675) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix pagination issue in GITHUB_JOB_ID() [#43681](https://github.com/ClickHouse/ClickHouse/pull/43681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.3.15.33-lts.md b/docs/changelogs/v22.3.15.33-lts.md new file mode 100644 index 00000000000..8f7e9442406 --- /dev/null +++ b/docs/changelogs/v22.3.15.33-lts.md @@ -0,0 +1,34 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.3.15.33-lts (4ef30f2c4b6) FIXME as compared to v22.3.14.23-lts (74956bfee4d) + +#### Backward Incompatible Change +* Backported in [#43484](https://github.com/ClickHouse/ClickHouse/issues/43484): Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then incompatible versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)). + +#### Improvement +* Backported in [#42839](https://github.com/ClickHouse/ClickHouse/issues/42839): Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#43050](https://github.com/ClickHouse/ClickHouse/issues/43050): Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#42963](https://github.com/ClickHouse/ClickHouse/issues/42963): Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#43039](https://github.com/ClickHouse/ClickHouse/issues/43039): Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#43427](https://github.com/ClickHouse/ClickHouse/issues/43427): Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#43720](https://github.com/ClickHouse/ClickHouse/issues/43720): Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.8.10.29-lts.md b/docs/changelogs/v22.8.10.29-lts.md new file mode 100644 index 00000000000..8f866d2aa40 --- /dev/null +++ b/docs/changelogs/v22.8.10.29-lts.md @@ -0,0 +1,32 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.8.10.29-lts (d568a57f7af) FIXME as compared to v22.8.9.24-lts (a1b69551d40) + +#### Backward Incompatible Change +* Backported in [#43485](https://github.com/ClickHouse/ClickHouse/issues/43485): Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then incompatible versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)). + +#### Build/Testing/Packaging Improvement +* Backported in [#43051](https://github.com/ClickHouse/ClickHouse/issues/43051): Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#43513](https://github.com/ClickHouse/ClickHouse/issues/43513): - Fix several buffer over-reads. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#43428](https://github.com/ClickHouse/ClickHouse/issues/43428): Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#43580](https://github.com/ClickHouse/ClickHouse/issues/43580): Fix a bug when row level filter uses default value of column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#43721](https://github.com/ClickHouse/ClickHouse/issues/43721): Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix 02267_file_globs_schema_inference.sql flakiness [#41877](https://github.com/ClickHouse/ClickHouse/pull/41877) ([Kruglov Pavel](https://github.com/Avogar)). +* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix pagination issue in GITHUB_JOB_ID() [#43681](https://github.com/ClickHouse/ClickHouse/pull/43681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.9.5.25-stable.md b/docs/changelogs/v22.9.5.25-stable.md new file mode 100644 index 00000000000..e94f97ed662 --- /dev/null +++ b/docs/changelogs/v22.9.5.25-stable.md @@ -0,0 +1,30 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.9.5.25-stable (68ba857aa82) FIXME as compared to v22.9.4.32-stable (3db8bcf1a70) + +#### Improvement +* Backported in [#42841](https://github.com/ClickHouse/ClickHouse/issues/42841): Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#42965](https://github.com/ClickHouse/ClickHouse/issues/42965): Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#43041](https://github.com/ClickHouse/ClickHouse/issues/43041): Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#42749](https://github.com/ClickHouse/ClickHouse/issues/42749): A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#42863](https://github.com/ClickHouse/ClickHouse/issues/42863): Fix lowerUTF8()/upperUTF8() in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#43063](https://github.com/ClickHouse/ClickHouse/issues/43063): Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)). +* Backported in [#43443](https://github.com/ClickHouse/ClickHouse/issues/43443): - Fix several buffer over-reads. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#43429](https://github.com/ClickHouse/ClickHouse/issues/43429): Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.9.6.20-stable.md b/docs/changelogs/v22.9.6.20-stable.md new file mode 100644 index 00000000000..a7127643fd3 --- /dev/null +++ b/docs/changelogs/v22.9.6.20-stable.md @@ -0,0 +1,28 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.9.6.20-stable (ef6343f9579) FIXME as compared to v22.9.5.25-stable (68ba857aa82) + +#### Backward Incompatible Change +* Backported in [#43486](https://github.com/ClickHouse/ClickHouse/issues/43486): Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then incompatible versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)). + +#### Build/Testing/Packaging Improvement +* Backported in [#43052](https://github.com/ClickHouse/ClickHouse/issues/43052): Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#43505](https://github.com/ClickHouse/ClickHouse/issues/43505): Fix a bug when row level filter uses default value of column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#43722](https://github.com/ClickHouse/ClickHouse/issues/43722): Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix 02267_file_globs_schema_inference.sql flakiness [#41877](https://github.com/ClickHouse/ClickHouse/pull/41877) ([Kruglov Pavel](https://github.com/Avogar)). +* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix pagination issue in GITHUB_JOB_ID() [#43681](https://github.com/ClickHouse/ClickHouse/pull/43681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md index c9fb78205d7..b19d09c777a 100644 --- a/docs/en/getting-started/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -16,7 +16,7 @@ import SupersetDocker from '@site/docs/en/_snippets/_add_superset_detail.md'; ## Goal In this guide you will learn how to: -- Load the OpenCelliD data in Clickhouse +- Load the OpenCelliD data in ClickHouse - Connect Apache Superset to ClickHouse - Build a dashboard based on data available in the dataset @@ -275,7 +275,7 @@ Here is a description of the columns taken from the OpenCelliD forum: To find your MCC check [Mobile network codes](https://en.wikipedia.org/wiki/Mobile_country_code), and use the three digits in the **Mobile country code** column. ::: -The schema for this table was designed for compact storage on disk and query speed. +The schema for this table was designed for compact storage on disk and query speed. - The `radio` data is stored as an `Enum8` (`UInt8`) rather than a string. - `mcc` or Mobile country code, is stored as a `UInt16` as we know the range is 1 - 999. - `lon` and `lat` are `Float64`. diff --git a/docs/en/getting-started/example-datasets/github.md b/docs/en/getting-started/example-datasets/github.md index 9a4fbb7da06..239637a34e9 100644 --- a/docs/en/getting-started/example-datasets/github.md +++ b/docs/en/getting-started/example-datasets/github.md @@ -56,6 +56,7 @@ As of November 8th, 2022, each TSV is approximately the following size and numbe - [Line by line commit history of a file](#line-by-line-commit-history-of-a-file) - [Unsolved Questions](#unsolved-questions) - [Git blame](#git-blame) +- [Related Content](#related-content) # Generating the data @@ -2497,3 +2498,7 @@ LIMIT 20 We welcome exact and improved solutions here. +# Related Content + +- [Git commits and our community](https://clickhouse.com/blog/clickhouse-git-community-commits) +- [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits) diff --git a/docs/en/getting-started/index.md b/docs/en/getting-started/index.md index 0bb3ae1ca71..e72e23208ac 100644 --- a/docs/en/getting-started/index.md +++ b/docs/en/getting-started/index.md @@ -22,5 +22,8 @@ functions in ClickHouse. The sample datasets include: - The [Cell Towers dataset](../getting-started/example-datasets/cell-towers.md) imports a CSV into ClickHouse - The [NYPD Complaint Data](../getting-started/example-datasets/nypd_complaint_data.md) demonstrates how to use data inference to simplify creating tables - The ["What's on the Menu?" dataset](../getting-started/example-datasets/menus.md) has an example of denormalizing data +- The [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1) provides examples of defining a schema and loading a small Hacker News dataset +- The [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json) shows how JSON data can be loaded +- The [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3) has examples of loading data from s3 -View the **Tutorials and Datasets** menu for a complete list of sample datasets. \ No newline at end of file +View the **Tutorials and Datasets** menu for a complete list of sample datasets. diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index e88e9e06a68..1ed93f7a1cb 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -8,7 +8,7 @@ slug: /en/install You have two options for getting up and running with ClickHouse: -- **[ClickHouse Cloud](https://clickhouse.cloud/):** the official ClickHouse as a service, - built by, maintained, and supported by the creators of ClickHouse +- **[ClickHouse Cloud](https://clickhouse.com/cloud/):** the official ClickHouse as a service, - built by, maintained, and supported by the creators of ClickHouse - **Self-managed ClickHouse:** ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture ## ClickHouse Cloud @@ -406,4 +406,3 @@ SELECT 1 **Congratulations, the system works!** To continue experimenting, you can download one of the test data sets or go through [tutorial](/docs/en/tutorial.md). - diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 3221b1a06fa..731348abfe7 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1456,6 +1456,10 @@ If setting [input_format_with_types_use_header](../operations/settings/settings. the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: +## RowBinary format settings {#row-binary-format-settings} + +- [format_binary_max_string_size](../operations/settings/settings.md#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`. + ## Values {#data-format-values} Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in a decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces aren’t inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](../sql-reference/syntax.md) is represented as `NULL`. diff --git a/docs/en/operations/_backup.md b/docs/en/operations/backup.md similarity index 98% rename from docs/en/operations/_backup.md rename to docs/en/operations/backup.md index d694c51cee6..061d95c1152 100644 --- a/docs/en/operations/_backup.md +++ b/docs/en/operations/backup.md @@ -1,5 +1,8 @@ +--- +slug: /en/operations/backup +--- -[//]: # (This file is included in Manage > Backups) +# Backup and Restore - [Backup to a local disk](#backup-to-a-local-disk) - [Configuring backup/restore to use an S3 endpoint](#configuring-backuprestore-to-use-an-s3-endpoint) @@ -55,7 +58,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - SETTINGS: - [`compression_method`](en/sql-reference/statements/create/table/#column-compression-codecs) and compression_level - `password` for the file on disk - - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` + - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` ### Usage examples @@ -72,7 +75,7 @@ RESTORE TABLE test.table FROM Disk('backups', '1.zip') :::note The above RESTORE would fail if the table `test.table` contains data, you would have to drop the table in order to test the RESTORE, or use the setting `allow_non_empty_tables=true`: ``` -RESTORE TABLE test.table FROM Disk('backups', '1.zip') +RESTORE TABLE test.table FROM Disk('backups', '1.zip') SETTINGS allow_non_empty_tables=true ``` ::: @@ -101,7 +104,7 @@ BACKUP TABLE test.table TO Disk('backups', 'incremental-a.zip') Restore all data from the incremental backup and the base_backup into a new table `test.table2`: ``` -RESTORE TABLE test.table AS test.table2 +RESTORE TABLE test.table AS test.table2 FROM Disk('backups', 'incremental-a.zip'); ``` @@ -356,4 +359,3 @@ Data can be restored from backup using the `ALTER TABLE ... ATTACH PARTITION ... For more information about queries related to partition manipulations, see the [ALTER documentation](../sql-reference/statements/alter/partition.md#alter_manipulations-with-partitions). A third-party tool is available to automate this approach: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup). - diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md index 3aeae7d1c9d..86760ec245f 100644 --- a/docs/en/operations/caches.md +++ b/docs/en/operations/caches.md @@ -11,6 +11,7 @@ Main cache types: - `mark_cache` — Cache of marks used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family. - `uncompressed_cache` — Cache of uncompressed data used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family. +- Operating system page cache (used indirectly, for files with actual data). Additional cache types: @@ -22,10 +23,4 @@ Additional cache types: - Schema inference cache. - [Filesystem cache](storing-data.md) over S3, Azure, Local and other disks. -Indirectly used: - -- OS page cache. - -To drop cache, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md) statements. - -[Original article](https://clickhouse.com/docs/en/operations/caches/) +To drop one of the caches, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md#drop-mark-cache) statements. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 7494f3db71a..2fc6e64b7eb 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -668,7 +668,7 @@ log_query_views=1 ## log_formatted_queries {#settings-log-formatted-queries} -Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table (populates `formatted_query` column in the [system.query_log](../../operations/system-tables/query_log.md)). +Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table (populates `formatted_query` column in the [system.query_log](../../operations/system-tables/query_log.md)). Possible values: @@ -1807,6 +1807,41 @@ See also: - System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) +## memory_profiler_step {#memory_profiler_step} + +Sets the step of memory profiler. Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stacktrace and will write it into [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log). + +Possible values: + +- A positive integer number of bytes. + +- 0 for turning off the memory profiler. + +Default value: 4,194,304 bytes (4 MiB). + +## memory_profiler_sample_probability {#memory_profiler_sample_probability} + +Sets the probability of collecting stacktraces at random allocations and deallocations and writing them into [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log). + +Possible values: + +- A positive floating-point number in the range [0..1]. + +- 0.0 for turning off the memory sampling. + +Default value: 0.0. + +## trace_profile_events {#trace_profile_events} + +Enables or disables collecting stacktraces on each update of profile events along with the name of profile event and the value of increment and sending them into [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log). + +Possible values: + +- 1 — Tracing of profile events enabled. +- 0 — Tracing of profile events disabled. + +Default value: 0. + ## allow_introspection_functions {#settings-allow_introspection_functions} Enables or disables [introspections functions](../../sql-reference/functions/introspection.md) for query profiling. @@ -4829,3 +4864,11 @@ Disabled by default. Allow skipping columns with unsupported types while schema inference for format BSONEachRow. Disabled by default. + +## RowBinary format settings {#row-binary-format-settings} + +### format_binary_max_string_size {#format_binary_max_string_size} + +The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit. + +Default value: `1GiB` diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index 0effe085b80..6299aafcae2 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -5,7 +5,8 @@ slug: /en/operations/system-tables/trace_log Contains stack traces collected by the sampling query profiler. -ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) and [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set. +ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also see settings: [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns), [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns), [memory_profiler_step](../../operations/settings/settings.md#memory_profiler_step), +[memory_profiler_sample_probability](../../operations/settings/settings.md#memory_profiler_sample_probability), [trace_profile_events](../../operations/settings/settings.md#trace_profile_events). To analyze logs, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` introspection functions. @@ -29,6 +30,8 @@ Columns: - `CPU` represents collecting stack traces by CPU time. - `Memory` represents collecting allocations and deallocations when memory allocation exceeds the subsequent watermark. - `MemorySample` represents collecting random allocations and deallocations. + - `MemoryPeak` represents collecting updates of peak memory usage. + - `ProfileEvent` represents collecting of increments of profile events. - `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Thread identifier. @@ -36,6 +39,12 @@ Columns: - `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. +- `size` ([Int64](../../sql-reference/data-types/int-uint.md)) - For trace types `Memory`, `MemorySample` or `MemoryPeak` is the amount of memory allocated, for other trace types is 0. + +- `event` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) - For trace type `ProfileEvent` is the name of updated profile event, for other trace types is an empty string. + +- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of incremnt of profile event, for other trace types is 0. + **Example** ``` sql diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 0201462c0b6..da34a6b7e9c 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -286,3 +286,7 @@ end script If you use antivirus software configure it to skip folders with ClickHouse datafiles (`/var/lib/clickhouse`) otherwise performance may be reduced and you may experience unexpected errors during data ingestion and background merges. [Original article](https://clickhouse.com/docs/en/operations/tips/) + +## Related Content + +- [Getting started with ClickHouse? Here are 13 "Deadly Sins" and how to avoid them](https://clickhouse.com/blog/common-getting-started-issues-with-clickhouse) diff --git a/docs/en/operations/_update.md b/docs/en/operations/update.md similarity index 94% rename from docs/en/operations/_update.md rename to docs/en/operations/update.md index 86981da2be6..6a880bb78b6 100644 --- a/docs/en/operations/_update.md +++ b/docs/en/operations/update.md @@ -1,5 +1,8 @@ +--- +slug: /en/operations/update +--- -[//]: # (This file is included in Manage > Updates) +# Update ## Self-managed ClickHouse Upgrade diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index cb1b8b9a8e6..b98c7ed9dda 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -117,3 +117,8 @@ Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec. ``` [Original article](https://clickhouse.com/docs/en/operations/utils/clickhouse-local/) + +## Related Content + +- [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1) +- [Exploring massive, real-world data sets: 100+ Years of Weather Records in ClickHouse](https://clickhouse.com/blog/real-world-data-noaa-climate-data) diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md index 34f875e2138..1e0b1d88c6e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md @@ -32,8 +32,8 @@ The null hypothesis is that means of populations are equal. Normal distribution - calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). - calculated p-value. [Float64](../../../sql-reference/data-types/float.md). -- [calculated confidence-interval-low.] [Float64](../../../sql-reference/data-types/float.md). -- [calculated confidence-interval-high.] [Float64](../../../sql-reference/data-types/float.md). +- calculated confidence-interval-low. [Float64](../../../sql-reference/data-types/float.md). +- calculated confidence-interval-high. [Float64](../../../sql-reference/data-types/float.md). **Example** diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md index 48dce40986e..3b2787008d2 100644 --- a/docs/en/sql-reference/data-types/geo.md +++ b/docs/en/sql-reference/data-types/geo.md @@ -95,3 +95,6 @@ Result: └─────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────┘ ``` +## Related Content + +- [Exploring massive, real-world data sets: 100+ Years of Weather Records in ClickHouse](https://clickhouse.com/blog/real-world-data-noaa-climate-data) diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index ab0f6115a41..ab1596b1760 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -75,3 +75,7 @@ SELECT * FROM json FORMAT JSONEachRow ```text {"o":{"a":1,"b":{"c":2,"d":[1,2,3]}}} ``` + +## Related Content + +- [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md b/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md index e6a0dac7afb..a409dab31f4 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md @@ -1,4 +1,4 @@ :::tip -If you are using a dictionary with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. -Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md). +If you are using a dictionary with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`. +Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/cloud/reference/cloud-compatibility.md). ::: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index 366d88e07c7..8ef19a181e7 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -134,3 +134,7 @@ Result: │ [[[(3,1),(0,1),(0,-1),(3,-1)]]] │ Value │ └─────────────────────────────────┴───────┘ ``` + +## Related Content + +- [Exploring massive, real-world data sets: 100+ Years of Weather Records in ClickHouse](https://clickhouse.com/blog/real-world-data-noaa-climate-data) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 6156a823d58..6cecc3f01da 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -410,35 +410,35 @@ Converts a date with time to a certain fixed date, while preserving the time. ## toRelativeYearNum -Converts a date with time or date to the number of the year, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the year, starting from a certain fixed point in the past. ## toRelativeQuarterNum -Converts a date with time or date to the number of the quarter, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the quarter, starting from a certain fixed point in the past. ## toRelativeMonthNum -Converts a date with time or date to the number of the month, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the month, starting from a certain fixed point in the past. ## toRelativeWeekNum -Converts a date with time or date to the number of the week, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the week, starting from a certain fixed point in the past. ## toRelativeDayNum -Converts a date with time or date to the number of the day, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the day, starting from a certain fixed point in the past. ## toRelativeHourNum -Converts a date with time or date to the number of the hour, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the hour, starting from a certain fixed point in the past. ## toRelativeMinuteNum -Converts a date with time or date to the number of the minute, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the minute, starting from a certain fixed point in the past. ## toRelativeSecondNum -Converts a date with time or date to the number of the second, starting from a certain fixed point in the past. +Converts a date or date with time to the number of the second, starting from a certain fixed point in the past. ## toISOYear @@ -517,6 +517,154 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d └────────────┴───────────┴───────────┴───────────┘ ``` +## age + +Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second. +E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit. + + +**Syntax** + +``` sql +age('unit', startdate, enddate, [timezone]) +``` + +**Arguments** + +- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). + Possible values: + + - `second` (possible abbreviations: `ss`, `s`) + - `minute` (possible abbreviations: `mi`, `n`) + - `hour` (possible abbreviations: `hh`, `h`) + - `day` (possible abbreviations: `dd`, `d`) + - `week` (possible abbreviations: `wk`, `ww`) + - `month` (possible abbreviations: `mm`, `m`) + - `quarter` (possible abbreviations: `qq`, `q`) + - `year` (possible abbreviations: `yyyy`, `yy`) + +- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +Difference between `enddate` and `startdate` expressed in `unit`. + +Type: [Int](../../sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00')); +``` + +Result: + +``` text +┌─age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00'))─┐ +│ 24 │ +└───────────────────────────────────────────────────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT + toDate('2022-01-01') AS e, + toDate('2021-12-29') AS s, + age('day', s, e) AS day_age, + age('month', s, e) AS month__age, + age('year', s, e) AS year_age; +``` + +Result: + +``` text +┌──────────e─┬──────────s─┬─day_age─┬─month__age─┬─year_age─┐ +│ 2022-01-01 │ 2021-12-29 │ 3 │ 0 │ 0 │ +└────────────┴────────────┴─────────┴────────────┴──────────┘ +``` + + +## date\_diff + +Returns the count of the specified `unit` boundaries crossed between the `startdate` and `enddate`. +The difference is calculated using relative units, e.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for day unit (see [toRelativeDayNum](#torelativedaynum)), 1 month for month unit (see [toRelativeMonthNum](#torelativemonthnum)), 1 year for year unit (see [toRelativeYearNum](#torelativeyearnum)). + +**Syntax** + +``` sql +date_diff('unit', startdate, enddate, [timezone]) +``` + +Aliases: `dateDiff`, `DATE_DIFF`. + +**Arguments** + +- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). + Possible values: + + - `second` (possible abbreviations: `ss`, `s`) + - `minute` (possible abbreviations: `mi`, `n`) + - `hour` (possible abbreviations: `hh`, `h`) + - `day` (possible abbreviations: `dd`, `d`) + - `week` (possible abbreviations: `wk`, `ww`) + - `month` (possible abbreviations: `mm`, `m`) + - `quarter` (possible abbreviations: `qq`, `q`) + - `year` (possible abbreviations: `yyyy`, `yy`) + +- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +Difference between `enddate` and `startdate` expressed in `unit`. + +Type: [Int](../../sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); +``` + +Result: + +``` text +┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ +│ 25 │ +└────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT + toDate('2022-01-01') AS e, + toDate('2021-12-29') AS s, + dateDiff('day', s, e) AS day_diff, + dateDiff('month', s, e) AS month__diff, + dateDiff('year', s, e) AS year_diff; +``` + +Result: + +``` text +┌──────────e─┬──────────s─┬─day_diff─┬─month__diff─┬─year_diff─┐ +│ 2022-01-01 │ 2021-12-29 │ 3 │ 1 │ 1 │ +└────────────┴────────────┴──────────┴─────────────┴───────────┘ +``` + ## date\_trunc Truncates date and time data to the specified part of date. @@ -637,80 +785,6 @@ Result: └───────────────────────────────────────────────┘ ``` -## date\_diff - -Returns the difference between two dates or dates with time values. -The difference is calculated using relative units, e.g. the difference between `2022-01-01` and `2021-12-29` is 3 days for day unit (see [toRelativeDayNum](#torelativedaynum)), 1 month for month unit (see [toRelativeMonthNum](#torelativemonthnum)), 1 year for year unit (see [toRelativeYearNum](#torelativeyearnum)). - -**Syntax** - -``` sql -date_diff('unit', startdate, enddate, [timezone]) -``` - -Aliases: `dateDiff`, `DATE_DIFF`. - -**Arguments** - -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). - Possible values: - - - `second` - - `minute` - - `hour` - - `day` - - `week` - - `month` - - `quarter` - - `year` - -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). - -**Returned value** - -Difference between `enddate` and `startdate` expressed in `unit`. - -Type: [Int](../../sql-reference/data-types/int-uint.md). - -**Example** - -Query: - -``` sql -SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); -``` - -Result: - -``` text -┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ -│ 25 │ -└────────────────────────────────────────────────────────────────────────────────────────┘ -``` - -Query: - -``` sql -SELECT - toDate('2022-01-01') AS e, - toDate('2021-12-29') AS s, - dateDiff('day', s, e) AS day_diff, - dateDiff('month', s, e) AS month__diff, - dateDiff('year', s, e) AS year_diff; -``` - -Result: - -``` text -┌──────────e─┬──────────s─┬─day_diff─┬─month__diff─┬─year_diff─┐ -│ 2022-01-01 │ 2021-12-29 │ 3 │ 1 │ 1 │ -└────────────┴────────────┴──────────┴─────────────┴───────────┘ -``` - ## date\_sub Subtracts the time interval or date interval from the provided date or date with time. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index b9ec21bb59d..536249626e5 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1865,6 +1865,17 @@ Next, specify the path to `libcatboostmodel.` in the clickhouse config ``` +For security and isolation reasons, the model evaluation does not run in the server process but in the clickhouse-library-bridge process. +At the first execution of `catboostEvaluate()`, the server starts the library bridge process if it is not running already. Both processes +communicate using a HTTP interface. By default, port `9012` is used. A different port can be specified as follows - this is useful if port +`9012` is already assigned to a different service. + +``` xml + + 9019 + +``` + 2. Train a catboost model using libcatboost See [Training and applying models](https://catboost.ai/docs/features/training.html#training) for how to train catboost models from a training data set. diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index b03ca88fc61..b515f6ad518 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -464,5 +464,39 @@ Removes the query string and fragment identifier. The question mark and number s ### cutURLParameter(URL, name) -Removes the ‘name’ URL parameter, if present. This function works under the assumption that the parameter name is encoded in the URL exactly the same way as in the passed argument. +Removes the `name` parameter from URL, if present. This function does not encode or decode characters in parameter names, e.g. `Client ID` and `Client%20ID` are treated as different parameter names. +**Syntax** + +``` sql +cutURLParameter(URL, name) +``` + +**Arguments** + +- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `name` — name of URL parameter. [String](../../sql-reference/data-types/string.md) or [Array](../../sql-reference/data-types/array.md) of Strings. + +**Returned value** + +- URL with `name` URL parameter removed. + +Type: `String`. + +**Example** + +Query: + +``` sql +SELECT + cutURLParameter('http://bigmir.net/?a=b&c=d&e=f#g', 'a') as url_without_a, + cutURLParameter('http://bigmir.net/?a=b&c=d&e=f#g', ['c', 'e']) as url_without_c_and_e; +``` + +Result: + +``` text +┌─url_without_a────────────────┬─url_without_c_and_e──────┐ +│ http://bigmir.net/?c=d&e=f#g │ http://bigmir.net/?a=b#g │ +└──────────────────────────────┴──────────────────────────┘ +``` diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index a8cea63380c..aad52efb39d 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -162,7 +162,7 @@ ALTER TABLE table_name [ON CLUSTER cluster] FREEZE [PARTITION partition_expr] [W This query creates a local backup of a specified partition. If the `PARTITION` clause is omitted, the query creates the backup of all partitions at once. -:::note +:::note The entire backup process is performed without stopping the server. ::: @@ -172,9 +172,9 @@ At the time of execution, for a data snapshot, the query creates hardlinks to a - `/var/lib/clickhouse/` is the working ClickHouse directory specified in the config. - `N` is the incremental number of the backup. -- if the `WITH NAME` parameter is specified, then the value of the `'backup_name'` parameter is used instead of the incremental number. +- if the `WITH NAME` parameter is specified, then the value of the `'backup_name'` parameter is used instead of the incremental number. -:::note +:::note If you use [a set of disks for data storage in a table](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-multiple-volumes), the `shadow/N` directory appears on every disk, storing data parts that matched by the `PARTITION` expression. ::: @@ -194,7 +194,7 @@ To restore data from a backup, do the following: Restoring from a backup does not require stopping the server. -For more information about backups and restoring data, see the [Data Backup](/docs/en/manage/backups.mdx) section. +For more information about backups and restoring data, see the [Data Backup](/docs/en/operations/backup.md) section. ## UNFREEZE PARTITION diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 4da5f4cc420..a545fb630c9 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -587,3 +587,8 @@ ORDER BY │ ambient_temp │ 2020-03-01 12:00:00 │ 16 │ 16 │ └──────────────┴─────────────────────┴───────┴─────────────────────────┘ ``` + +## Related Content + +- [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits) +- [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index f430f5cae51..8fbcaf9568b 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -424,23 +424,23 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d ## toRelativeYearNum {#torelativeyearnum} -Переводит дату-с-временем или дату в номер года, начиная с некоторого фиксированного момента в прошлом. +Переводит дату или дату-с-временем в номер года, начиная с некоторого фиксированного момента в прошлом. ## toRelativeQuarterNum {#torelativequarternum} -Переводит дату-с-временем или дату в номер квартала, начиная с некоторого фиксированного момента в прошлом. +Переводит дату или дату-с-временем в номер квартала, начиная с некоторого фиксированного момента в прошлом. ## toRelativeMonthNum {#torelativemonthnum} -Переводит дату-с-временем или дату в номер месяца, начиная с некоторого фиксированного момента в прошлом. +Переводит дату или дату-с-временем в номер месяца, начиная с некоторого фиксированного момента в прошлом. ## toRelativeWeekNum {#torelativeweeknum} -Переводит дату-с-временем или дату в номер недели, начиная с некоторого фиксированного момента в прошлом. +Переводит дату или дату-с-временем в номер недели, начиная с некоторого фиксированного момента в прошлом. ## toRelativeDayNum {#torelativedaynum} -Переводит дату-с-временем или дату в номер дня, начиная с некоторого фиксированного момента в прошлом. +Переводит дату или дату-с-временем в номер дня, начиная с некоторого фиксированного момента в прошлом. ## toRelativeHourNum {#torelativehournum} @@ -456,7 +456,7 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d ## toISOYear {#toisoyear} -Переводит дату-с-временем или дату в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января. +Переводит дату или дату-с-временем в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января. **Пример** @@ -479,7 +479,7 @@ SELECT ## toISOWeek {#toisoweek} -Переводит дату-с-временем или дату в число типа UInt8, содержащее номер ISO недели. +Переводит дату или дату-с-временем в число типа UInt8, содержащее номер ISO недели. Начало ISO года отличается от начала обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) первая неделя года - это неделя с четырьмя или более днями в этом году. 1 Января 2017 г. - воскресение, т.е. первая ISO неделя 2017 года началась в понедельник 2 января, поэтому 1 января 2017 это последняя неделя 2016 года. @@ -503,7 +503,7 @@ SELECT ``` ## toWeek(date\[, mode\]\[, timezone\]) {#toweek} -Переводит дату-с-временем или дату в число UInt8, содержащее номер недели. Второй аргументам mode задает режим, начинается ли неделя с воскресенья или с понедельника и должно ли возвращаемое значение находиться в диапазоне от 0 до 53 или от 1 до 53. Если аргумент mode опущен, то используется режим 0. +Переводит дату или дату-с-временем в число UInt8, содержащее номер недели. Второй аргументам mode задает режим, начинается ли неделя с воскресенья или с понедельника и должно ли возвращаемое значение находиться в диапазоне от 0 до 53 или от 1 до 53. Если аргумент mode опущен, то используется режим 0. `toISOWeek() ` эквивалентно `toWeek(date,3)`. @@ -569,6 +569,132 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d └────────────┴───────────┴───────────┴───────────┘ ``` +## age + +Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду. +Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`. + +**Синтаксис** + +``` sql +age('unit', startdate, enddate, [timezone]) +``` + +**Аргументы** + +- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). + Возможные значения: + + - `second` (возможные сокращения: `ss`, `s`) + - `minute` (возможные сокращения: `mi`, `n`) + - `hour` (возможные сокращения: `hh`, `h`) + - `day` (возможные сокращения: `dd`, `d`) + - `week` (возможные сокращения: `wk`, `ww`) + - `month` (возможные сокращения: `mm`, `m`) + - `quarter` (возможные сокращения: `qq`, `q`) + - `year` (возможные сокращения: `yyyy`, `yy`) + +- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +Разница между `enddate` и `startdate`, выраженная в `unit`. + +Тип: [Int](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00')); +``` + +Результат: + +``` text +┌─age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00'))─┐ +│ 24 │ +└───────────────────────────────────────────────────────────────────────────────────┘ +``` + +Запрос: + +``` sql +SELECT + toDate('2022-01-01') AS e, + toDate('2021-12-29') AS s, + age('day', s, e) AS day_age, + age('month', s, e) AS month__age, + age('year', s, e) AS year_age; +``` + +Результат: + +``` text +┌──────────e─┬──────────s─┬─day_age─┬─month__age─┬─year_age─┐ +│ 2022-01-01 │ 2021-12-29 │ 3 │ 0 │ 0 │ +└────────────┴────────────┴─────────┴────────────┴──────────┘ +``` + +## date\_diff {#date_diff} + +Вычисляет разницу указанных границ `unit` пересекаемых между `startdate` и `enddate`. + +**Синтаксис** + +``` sql +date_diff('unit', startdate, enddate, [timezone]) +``` + +Синонимы: `dateDiff`, `DATE_DIFF`. + +**Аргументы** + +- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). + Возможные значения: + + - `second` (возможные сокращения: `ss`, `s`) + - `minute` (возможные сокращения: `mi`, `n`) + - `hour` (возможные сокращения: `hh`, `h`) + - `day` (возможные сокращения: `dd`, `d`) + - `week` (возможные сокращения: `wk`, `ww`) + - `month` (возможные сокращения: `mm`, `m`) + - `quarter` (возможные сокращения: `qq`, `q`) + - `year` (возможные сокращения: `yyyy`, `yy`) + +- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +Разница между `enddate` и `startdate`, выраженная в `unit`. + +Тип: [Int](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); +``` + +Результат: + +``` text +┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ +│ 25 │ +└────────────────────────────────────────────────────────────────────────────────────────┘ +``` + ## date_trunc {#date_trunc} Отсекает от даты и времени части, меньшие чем указанная часть. @@ -689,60 +815,6 @@ SELECT date_add(YEAR, 3, toDate('2018-01-01')); └───────────────────────────────────────────────┘ ``` -## date\_diff {#date_diff} - -Вычисляет разницу между двумя значениями дат или дат со временем. - -**Синтаксис** - -``` sql -date_diff('unit', startdate, enddate, [timezone]) -``` - -Синонимы: `dateDiff`, `DATE_DIFF`. - -**Аргументы** - -- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). - Возможные значения: - - - `second` - - `minute` - - `hour` - - `day` - - `week` - - `month` - - `quarter` - - `year` - -- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md). - -**Возвращаемое значение** - -Разница между `enddate` и `startdate`, выраженная в `unit`. - -Тип: [Int](../../sql-reference/data-types/int-uint.md). - -**Пример** - -Запрос: - -``` sql -SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); -``` - -Результат: - -``` text -┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ -│ 25 │ -└────────────────────────────────────────────────────────────────────────────────────────┘ -``` - ## date\_sub {#date_sub} Вычитает интервал времени или даты из указанной даты или даты со временем. diff --git a/docs/ru/sql-reference/functions/url-functions.md b/docs/ru/sql-reference/functions/url-functions.md index 34bb88f4991..3c6e6151ef8 100644 --- a/docs/ru/sql-reference/functions/url-functions.md +++ b/docs/ru/sql-reference/functions/url-functions.md @@ -404,5 +404,39 @@ SELECT netloc('http://paul@www.example.com:80/'); ### cutURLParameter(URL, name) {#cuturlparameterurl-name} -Удаляет параметр URL с именем name, если такой есть. Функция работает при допущении, что имя параметра закодировано в URL в точности таким же образом, что и в переданном аргументе. +Удаляет параметр с именем `name` из URL, если такой есть. Функция не кодирует или декодирует символы в именах параметров. Например `Client ID` и `Client%20ID` обрабатываются как разные имена параметров. +**Синтаксис** + +``` sql +cutURLParameter(URL, name) +``` + +**Аргументы** + +- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `name` — имя параметра URL. [String](../../sql-reference/data-types/string.md) или [Array](../../sql-reference/data-types/array.md) состоящий из строк. + +**Возвращаемое значение** + +- URL с удалённым параметром URL с именем `name`. + +Type: `String`. + +**Пример** + +Запрос: + +``` sql +SELECT + cutURLParameter('http://bigmir.net/?a=b&c=d&e=f#g', 'a') as url_without_a, + cutURLParameter('http://bigmir.net/?a=b&c=d&e=f#g', ['c', 'e']) as url_without_c_and_e; +``` + +Результат: + +``` text +┌─url_without_a────────────────┬─url_without_c_and_e──────┐ +│ http://bigmir.net/?c=d&e=f#g │ http://bigmir.net/?a=b#g │ +└──────────────────────────────┴──────────────────────────┘ +``` diff --git a/docs/ru/sql-reference/table-functions/format.md b/docs/ru/sql-reference/table-functions/format.md deleted file mode 120000 index cc5e3a5a142..00000000000 --- a/docs/ru/sql-reference/table-functions/format.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/table-functions/format.md \ No newline at end of file diff --git a/docs/ru/sql-reference/table-functions/format.md b/docs/ru/sql-reference/table-functions/format.md new file mode 100644 index 00000000000..7b1516bc173 --- /dev/null +++ b/docs/ru/sql-reference/table-functions/format.md @@ -0,0 +1,75 @@ +--- +slug: /ru/sql-reference/table-functions/format +sidebar_position: 56 +sidebar_label: format +--- + +# format + +Extracts table structure from data and parses it according to specified input format. + +**Syntax** + +``` sql +format(format_name, data) +``` + +**Parameters** + +- `format_name` — The [format](../../interfaces/formats.md#formats) of the data. +- `data` — String literal or constant expression that returns a string containing data in specified format + +**Returned value** + +A table with data parsed from `data` argument according specified format and extracted schema. + +**Examples** + +**Query:** +``` sql +:) select * from format(JSONEachRow, +$$ +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 112} +{"a": "World", "b": 124} +$$) +``` + +**Result:** + +```text +┌───b─┬─a─────┐ +│ 111 │ Hello │ +│ 123 │ World │ +│ 112 │ Hello │ +│ 124 │ World │ +└─────┴───────┘ +``` + +**Query:** +```sql + +:) desc format(JSONEachRow, +$$ +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 112} +{"a": "World", "b": 124} +$$) +``` + +**Result:** + +```text +┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ b │ Nullable(Float64) │ │ │ │ │ │ +│ a │ Nullable(String) │ │ │ │ │ │ +└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +**See Also** + +- [Formats](../../interfaces/formats.md) + +[Original article](https://clickhouse.com/docs/en/sql-reference/table-functions/format) diff --git a/docs/tools/release.sh b/docs/tools/release.sh index 1d344457bf1..67499631baa 100755 --- a/docs/tools/release.sh +++ b/docs/tools/release.sh @@ -19,7 +19,7 @@ then # Will make a repository with website content as the only commit. git init git remote add origin "${GIT_PROD_URI}" - git config user.email "robot-clickhouse@clickhouse.com" + git config user.email "robot-clickhouse@users.noreply.github.com" git config user.name "robot-clickhouse" # Add files. diff --git a/docs/zh/sql-reference/table-functions/format.md b/docs/zh/sql-reference/table-functions/format.md deleted file mode 120000 index cc5e3a5a142..00000000000 --- a/docs/zh/sql-reference/table-functions/format.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/table-functions/format.md \ No newline at end of file diff --git a/docs/zh/sql-reference/table-functions/format.md b/docs/zh/sql-reference/table-functions/format.md new file mode 100644 index 00000000000..d111c175272 --- /dev/null +++ b/docs/zh/sql-reference/table-functions/format.md @@ -0,0 +1,75 @@ +--- +slug: /zh/sql-reference/table-functions/format +sidebar_position: 56 +sidebar_label: format +--- + +# format + +Extracts table structure from data and parses it according to specified input format. + +**Syntax** + +``` sql +format(format_name, data) +``` + +**Parameters** + +- `format_name` — The [format](../../interfaces/formats.md#formats) of the data. +- `data` — String literal or constant expression that returns a string containing data in specified format + +**Returned value** + +A table with data parsed from `data` argument according specified format and extracted schema. + +**Examples** + +**Query:** +``` sql +:) select * from format(JSONEachRow, +$$ +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 112} +{"a": "World", "b": 124} +$$) +``` + +**Result:** + +```text +┌───b─┬─a─────┐ +│ 111 │ Hello │ +│ 123 │ World │ +│ 112 │ Hello │ +│ 124 │ World │ +└─────┴───────┘ +``` + +**Query:** +```sql + +:) desc format(JSONEachRow, +$$ +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 112} +{"a": "World", "b": 124} +$$) +``` + +**Result:** + +```text +┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ b │ Nullable(Float64) │ │ │ │ │ │ +│ a │ Nullable(String) │ │ │ │ │ │ +└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +**See Also** + +- [Formats](../../interfaces/formats.md) + +[Original article](https://clickhouse.com/docs/en/sql-reference/table-functions/format) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 9266a4ca419..9b01e6920a4 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -13,7 +13,6 @@ clickhouse_embed_binaries( set(CLICKHOUSE_KEEPER_SOURCES Keeper.cpp - TinyContext.cpp ) set (CLICKHOUSE_KEEPER_LINK @@ -49,6 +48,8 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateMachine.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateManager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperAsynchronousMetrics.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/TinyContext.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/pathUtils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SummingStateMachine.cpp @@ -64,7 +65,18 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperTCPHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/TCPServer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/NotFoundHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ProtocolServerAdapter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnection.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerRequest.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerResponse.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServerConnectionFactory.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CachedCompressedReadBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CheckingCompressedReadBuffer.cpp @@ -96,9 +108,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp Keeper.cpp - TinyContext.cpp clickhouse-keeper.cpp - ) clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES}) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 8f65141b533..25452b808e2 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -22,8 +21,15 @@ #include #include #include -#include +#include +#include + +#include +#include +#include + +#include "Core/Defines.h" #include "config.h" #include "config_version.h" @@ -52,6 +58,16 @@ int mainEntryClickHouseKeeper(int argc, char ** argv) } } +#ifdef KEEPER_STANDALONE_BUILD + +// Weak symbols don't work correctly on Darwin +// so we have a stub implementation to avoid linker errors +void collectCrashLog( + Int32, UInt64, const String &, const StackTrace &) +{} + +#endif + namespace DB { @@ -261,6 +277,60 @@ void Keeper::defineOptions(Poco::Util::OptionSet & options) BaseDaemon::defineOptions(options); } +struct Keeper::KeeperHTTPContext : public IHTTPContext +{ + explicit KeeperHTTPContext(TinyContextPtr context_) + : context(std::move(context_)) + {} + + uint64_t getMaxHstsAge() const override + { + return context->getConfigRef().getUInt64("keeper_server.hsts_max_age", 0); + } + + uint64_t getMaxUriSize() const override + { + return context->getConfigRef().getUInt64("keeper_server.http_max_uri_size", 1048576); + } + + uint64_t getMaxFields() const override + { + return context->getConfigRef().getUInt64("keeper_server.http_max_fields", 1000000); + } + + uint64_t getMaxFieldNameSize() const override + { + return context->getConfigRef().getUInt64("keeper_server.http_max_field_name_size", 1048576); + } + + uint64_t getMaxFieldValueSize() const override + { + return context->getConfigRef().getUInt64("keeper_server.http_max_field_value_size", 1048576); + } + + uint64_t getMaxChunkSize() const override + { + return context->getConfigRef().getUInt64("keeper_server.http_max_chunk_size", 100_GiB); + } + + Poco::Timespan getReceiveTimeout() const override + { + return context->getConfigRef().getUInt64("keeper_server.http_receive_timeout", DEFAULT_HTTP_READ_BUFFER_TIMEOUT); + } + + Poco::Timespan getSendTimeout() const override + { + return context->getConfigRef().getUInt64("keeper_server.http_send_timeout", DEFAULT_HTTP_READ_BUFFER_TIMEOUT); + } + + TinyContextPtr context; +}; + +HTTPContextPtr Keeper::httpContext() +{ + return std::make_shared(tiny_context); +} + int Keeper::main(const std::vector & /*args*/) try { @@ -335,6 +405,25 @@ try DNSResolver::instance().setDisableCacheFlag(); Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024)); + std::mutex servers_lock; + auto servers = std::make_shared>(); + + tiny_context = std::make_shared(); + /// This object will periodically calculate some metrics. + KeeperAsynchronousMetrics async_metrics( + tiny_context, + config().getUInt("asynchronous_metrics_update_period_s", 1), + [&]() -> std::vector + { + std::vector metrics; + + std::lock_guard lock(servers_lock); + metrics.reserve(servers->size()); + for (const auto & server : *servers) + metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); + return metrics; + } + ); std::vector listen_hosts = DB::getMultipleValuesFromConfig(config(), "", "listen_host"); @@ -346,15 +435,13 @@ try listen_try = true; } - auto servers = std::make_shared>(); - /// Initialize keeper RAFT. Do nothing if no keeper_server in config. - tiny_context.initializeKeeperDispatcher(/* start_async = */ true); - FourLetterCommandFactory::registerCommands(*tiny_context.getKeeperDispatcher()); + tiny_context->initializeKeeperDispatcher(/* start_async = */ true); + FourLetterCommandFactory::registerCommands(*tiny_context->getKeeperDispatcher()); auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration & { - return tiny_context.getConfigRef(); + return tiny_context->getConfigRef(); }; for (const auto & listen_host : listen_hosts) @@ -373,7 +460,7 @@ try "Keeper (tcp): " + address.toString(), std::make_unique( new KeeperTCPHandlerFactory( - config_getter, tiny_context.getKeeperDispatcher(), + config_getter, tiny_context->getKeeperDispatcher(), config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), false), server_pool, socket)); }); @@ -392,7 +479,7 @@ try "Keeper with secure protocol (tcp_secure): " + address.toString(), std::make_unique( new KeeperTCPHandlerFactory( - config_getter, tiny_context.getKeeperDispatcher(), + config_getter, tiny_context->getKeeperDispatcher(), config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), true), server_pool, socket)); #else @@ -401,6 +488,29 @@ try ErrorCodes::SUPPORT_IS_DISABLED}; #endif }); + + const auto & config = config_getter(); + Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC); + http_params->setKeepAliveTimeout(keep_alive_timeout); + + /// Prometheus (if defined and not setup yet with http_port) + port_name = "prometheus.port"; + createServer(listen_host, port_name, listen_try, [&](UInt16 port) + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + auto http_context = httpContext(); + socket.setReceiveTimeout(http_context->getReceiveTimeout()); + socket.setSendTimeout(http_context->getSendTimeout()); + servers->emplace_back( + listen_host, + port_name, + "Prometheus: http://" + address.toString(), + std::make_unique( + std::move(http_context), createPrometheusMainHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); + }); } for (auto & server : *servers) @@ -409,6 +519,8 @@ try LOG_INFO(log, "Listening for {}", server.getDescription()); } + async_metrics.start(); + zkutil::EventPtr unused_event = std::make_shared(); zkutil::ZooKeeperNodeCache unused_cache([] { return nullptr; }); /// ConfigReloader have to strict parameters which are redundant in our case @@ -421,7 +533,7 @@ try [&](ConfigurationPtr config, bool /* initial_loading */) { if (config->has("keeper_server")) - tiny_context.updateKeeperConfiguration(*config); + tiny_context->updateKeeperConfiguration(*config); }, /* already_loaded = */ false); /// Reload it right now (initial loading) @@ -429,6 +541,8 @@ try LOG_INFO(log, "Shutting down."); main_config_reloader.reset(); + async_metrics.stop(); + LOG_DEBUG(log, "Waiting for current connections to Keeper to finish."); size_t current_connections = 0; for (auto & server : *servers) @@ -450,7 +564,7 @@ try else LOG_INFO(log, "Closed connections to Keeper."); - tiny_context.shutdownKeeperDispatcher(); + tiny_context->shutdownKeeperDispatcher(); /// Wait server pool to avoid use-after-free of destroyed context in the handlers server_pool.joinAll(); diff --git a/programs/keeper/Keeper.h b/programs/keeper/Keeper.h index 75cd9b825d0..8a7724acb85 100644 --- a/programs/keeper/Keeper.h +++ b/programs/keeper/Keeper.h @@ -1,8 +1,9 @@ #pragma once #include +#include #include -#include "TinyContext.h" +#include namespace Poco { @@ -15,29 +16,40 @@ namespace Poco namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + /// standalone clickhouse-keeper server (replacement for ZooKeeper). Uses the same /// config as clickhouse-server. Serves requests on TCP ports with or without /// SSL using ZooKeeper protocol. -class Keeper : public BaseDaemon +class Keeper : public BaseDaemon, public IServer { public: using ServerApplication::run; - Poco::Util::LayeredConfiguration & config() const + Poco::Util::LayeredConfiguration & config() const override { return BaseDaemon::config(); } - Poco::Logger & logger() const + Poco::Logger & logger() const override { return BaseDaemon::logger(); } - bool isCancelled() const + bool isCancelled() const override { return BaseDaemon::isCancelled(); } + /// Returns global application's context. + ContextMutablePtr context() const override + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot fetch context for Keeper"); + } + void defineOptions(Poco::Util::OptionSet & _options) override; protected: @@ -56,7 +68,10 @@ protected: std::string getDefaultConfigFileName() const override; private: - TinyContext tiny_context; + TinyContextPtr tiny_context; + + struct KeeperHTTPContext; + HTTPContextPtr httpContext(); Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index ce7e27026f1..33d11091660 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include @@ -120,7 +120,7 @@ void LocalServer::initialize(Poco::Util::Application & self) config().getUInt("max_io_thread_pool_free_size", 0), config().getUInt("io_thread_pool_queue_size", 10000)); - NamedCollectionFactory::instance().initialize(config()); + NamedCollectionUtils::loadFromConfig(config()); } @@ -212,6 +212,8 @@ void LocalServer::tryInitPath() global_context->setUserFilesPath(""); // user's files are everywhere + NamedCollectionUtils::loadFromSQL(global_context); + /// top_level_domains_lists const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/"); if (!top_level_domains_path.empty()) diff --git a/programs/server/MetricsTransmitter.cpp b/programs/server/MetricsTransmitter.cpp index f7829a49a39..2f28f0a1d16 100644 --- a/programs/server/MetricsTransmitter.cpp +++ b/programs/server/MetricsTransmitter.cpp @@ -1,6 +1,6 @@ #include "MetricsTransmitter.h" -#include +#include #include #include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b6ce358a5ef..965717d74b9 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include #include @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include #include #include @@ -782,7 +782,7 @@ try config().getUInt("max_io_thread_pool_free_size", 0), config().getUInt("io_thread_pool_queue_size", 10000)); - NamedCollectionFactory::instance().initialize(config()); + NamedCollectionUtils::loadFromConfig(config()); /// Initialize global local cache for remote filesystem. if (config().has("local_cache_for_remote_fs")) @@ -803,7 +803,7 @@ try std::vector servers; std::vector servers_to_start_before_tables; /// This object will periodically calculate some metrics. - AsynchronousMetrics async_metrics( + ServerAsynchronousMetrics async_metrics( global_context, config().getUInt("asynchronous_metrics_update_period_s", 1), config().getUInt("asynchronous_heavy_metrics_update_period_s", 120), @@ -1168,6 +1168,8 @@ try SensitiveDataMasker::setInstance(std::make_unique(config(), "query_masking_rules")); } + NamedCollectionUtils::loadFromSQL(global_context); + auto main_config_reloader = std::make_unique( config_path, include_from_path, @@ -1336,7 +1338,8 @@ try #if USE_SSL CertificateReloader::instance().tryLoad(*config); #endif - NamedCollectionFactory::instance().reload(*config); + NamedCollectionUtils::reloadFromConfig(*config); + ProfileEvents::increment(ProfileEvents::MainConfigLoads); /// Must be the last. @@ -1947,15 +1950,15 @@ std::unique_ptr Server::buildProtocolStackFromConfig( return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this)); if (type == "http") return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory")) + new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory")) ); if (type == "prometheus") return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory")) + new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory")) ); if (type == "interserver") return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory")) + new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory")) ); throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type); @@ -1996,6 +1999,11 @@ std::unique_ptr Server::buildProtocolStackFromConfig( return stack; } +HTTPContextPtr Server::httpContext() const +{ + return std::make_shared(context()); +} + void Server::createServers( Poco::Util::AbstractConfiguration & config, const Strings & listen_hosts, @@ -2078,7 +2086,7 @@ void Server::createServers( port_name, "http://" + address.toString(), std::make_unique( - context(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); }); /// HTTPS @@ -2095,7 +2103,7 @@ void Server::createServers( port_name, "https://" + address.toString(), std::make_unique( - context(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); #else UNUSED(port); throw Exception{"HTTPS protocol is disabled because Poco library was built without NetSSL support.", @@ -2220,7 +2228,7 @@ void Server::createServers( port_name, "Prometheus: http://" + address.toString(), std::make_unique( - context(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); + httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); }); } @@ -2240,7 +2248,7 @@ void Server::createServers( port_name, "replica communication (interserver): http://" + address.toString(), std::make_unique( - context(), + httpContext(), createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), server_pool, socket, @@ -2260,7 +2268,7 @@ void Server::createServers( port_name, "secure replica communication (interserver): https://" + address.toString(), std::make_unique( - context(), + httpContext(), createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"), server_pool, socket, diff --git a/programs/server/Server.h b/programs/server/Server.h index 53841b1fcd4..e9ae6d8d937 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -3,6 +3,7 @@ #include #include +#include "Server/HTTP/HTTPContext.h" #include #include @@ -72,6 +73,8 @@ private: /// Updated/recent config, to compare http_handlers ConfigurationPtr latest_config; + HTTPContextPtr httpContext() const; + Poco::Net::SocketAddress socketBindListen( const Poco::Util::AbstractConfiguration & config, Poco::Net::ServerSocket & socket, diff --git a/src/Access/Common/AccessFlags.h b/src/Access/Common/AccessFlags.h index 5124f4ef332..c4e0b7ac281 100644 --- a/src/Access/Common/AccessFlags.h +++ b/src/Access/Common/AccessFlags.h @@ -104,7 +104,7 @@ public: /// The same as allColumnFlags(). static AccessFlags allFlagsGrantableOnColumnLevel(); - static constexpr size_t SIZE = 128; + static constexpr size_t SIZE = 256; private: using Flags = std::bitset; Flags flags; diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index ed87b13f01a..366667410d5 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -69,6 +69,7 @@ enum class AccessType M(ALTER_FREEZE_PARTITION, "FREEZE PARTITION, UNFREEZE", TABLE, ALTER_TABLE) \ \ M(ALTER_DATABASE_SETTINGS, "ALTER DATABASE SETTING, ALTER MODIFY DATABASE SETTING, MODIFY DATABASE SETTING", DATABASE, ALTER_DATABASE) /* allows to execute ALTER MODIFY SETTING */\ + M(ALTER_NAMED_COLLECTION, "", GROUP, ALTER) /* allows to execute ALTER NAMED COLLECTION */\ \ M(ALTER_TABLE, "", GROUP, ALTER) \ M(ALTER_DATABASE, "", GROUP, ALTER) \ @@ -88,6 +89,7 @@ enum class AccessType M(CREATE_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables; implicitly enabled by the grant CREATE_TABLE on any table */ \ M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \ + M(CREATE_NAMED_COLLECTION, "", GLOBAL, CREATE) /* allows to execute CREATE NAMED COLLECTION */ \ M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \ \ M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\ @@ -96,6 +98,7 @@ enum class AccessType implicitly enabled by the grant DROP_TABLE */\ M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\ M(DROP_FUNCTION, "", GLOBAL, DROP) /* allows to execute DROP FUNCTION */\ + M(DROP_NAMED_COLLECTION, "", GLOBAL, DROP) /* allows to execute DROP NAMED COLLECTION */\ M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\ \ M(TRUNCATE, "TRUNCATE TABLE", TABLE, ALL) \ diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h index aa3f78c8f0b..a1a2ce2669b 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h @@ -160,7 +160,7 @@ public: else { writeBinary(UInt8(0), buf); - serialization->serializeBinary(elem, buf); + serialization->serializeBinary(elem, buf, {}); } } } @@ -181,7 +181,7 @@ public: UInt8 is_null = 0; readBinary(is_null, buf); if (!is_null) - serialization->deserializeBinary(arr[i], buf); + serialization->deserializeBinary(arr[i], buf, {}); } } diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index d6a8e895a11..8117daa4760 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -795,7 +795,7 @@ public: if (!value.isNull()) { writeBinary(true, buf); - serialization.serializeBinary(value, buf); + serialization.serializeBinary(value, buf, {}); } else writeBinary(false, buf); @@ -807,7 +807,7 @@ public: readBinary(is_not_null, buf); if (is_not_null) - serialization.deserializeBinary(value, buf); + serialization.deserializeBinary(value, buf, {}); } void change(const IColumn & column, size_t row_num, Arena *) diff --git a/src/AggregateFunctions/AggregateFunctionNull.h b/src/AggregateFunctions/AggregateFunctionNull.h index deed06b8bf2..64f48ac2987 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.h +++ b/src/AggregateFunctions/AggregateFunctionNull.h @@ -77,7 +77,10 @@ protected: static bool getFlag(ConstAggregateDataPtr __restrict place) noexcept { - return result_is_nullable ? place[0] : true; + if constexpr (result_is_nullable) + return place[0]; + else + return true; } public: @@ -98,9 +101,10 @@ public: DataTypePtr getReturnType() const override { - return result_is_nullable - ? makeNullable(nested_function->getReturnType()) - : nested_function->getReturnType(); + if constexpr (result_is_nullable) + return makeNullable(nested_function->getReturnType()); + else + return nested_function->getReturnType(); } void create(AggregateDataPtr __restrict place) const override @@ -136,8 +140,9 @@ public: void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - if (result_is_nullable && getFlag(rhs)) - setFlag(place); + if constexpr (result_is_nullable) + if (getFlag(rhs)) + setFlag(place); nested_function->merge(nestedPlace(place), nestedPlace(rhs), arena); } diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index e9db1a71511..1e32be987ff 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -296,19 +296,19 @@ public: { case 0: { - serialize = [&](size_t col_idx, const Array & values){ values_serializations[col_idx]->serializeBinary(values[col_idx], buf); }; + serialize = [&](size_t col_idx, const Array & values){ values_serializations[col_idx]->serializeBinary(values[col_idx], buf, {}); }; break; } case 1: { - serialize = [&](size_t col_idx, const Array & values){ promoted_values_serializations[col_idx]->serializeBinary(values[col_idx], buf); }; + serialize = [&](size_t col_idx, const Array & values){ promoted_values_serializations[col_idx]->serializeBinary(values[col_idx], buf, {}); }; break; } } for (const auto & elem : merged_maps) { - keys_serialization->serializeBinary(elem.first, buf); + keys_serialization->serializeBinary(elem.first, buf, {}); for (size_t col = 0; col < values_types.size(); ++col) serialize(col, elem.second); } @@ -328,12 +328,12 @@ public: { case 0: { - deserialize = [&](size_t col_idx, Array & values){ values_serializations[col_idx]->deserializeBinary(values[col_idx], buf); }; + deserialize = [&](size_t col_idx, Array & values){ values_serializations[col_idx]->deserializeBinary(values[col_idx], buf, {}); }; break; } case 1: { - deserialize = [&](size_t col_idx, Array & values){ promoted_values_serializations[col_idx]->deserializeBinary(values[col_idx], buf); }; + deserialize = [&](size_t col_idx, Array & values){ promoted_values_serializations[col_idx]->deserializeBinary(values[col_idx], buf, {}); }; break; } } @@ -341,7 +341,7 @@ public: for (size_t i = 0; i < size; ++i) { Field key; - keys_serialization->deserializeBinary(key, buf); + keys_serialization->deserializeBinary(key, buf, {}); Array values; values.resize(values_types.size()); diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp index 6e26e3cd8d8..f7e703cdaa4 100644 --- a/src/Analyzer/Passes/FuseFunctionsPass.cpp +++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp @@ -79,8 +79,6 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query function_node->resolveAsAggregateFunction(aggregate_function, aggregate_function->getReturnType()); function_node->getArguments().getNodes() = { argument }; - function_node->getArguments().getNodes() = { argument }; - if (!parameters.empty()) { QueryTreeNodes parameter_nodes; diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 4c2074fcfeb..a1fc09bac39 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -2843,6 +2843,14 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const } } + /** Nested subqueries cannot access outer subqueries table expressions from JOIN tree because + * that can prevent resolution of table expression from CTE. + * + * Example: WITH a AS (SELECT number FROM numbers(1)), b AS (SELECT number FROM a) SELECT * FROM a as l, b as r; + */ + if (identifier_lookup.isTableExpressionLookup()) + identifier_resolve_settings.allow_to_check_join_tree = false; + while (scope_to_check != nullptr) { auto lookup_result = tryResolveIdentifier(identifier_lookup, *scope_to_check, identifier_resolve_settings); diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 8342749e230..9b5711d5595 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -166,7 +166,8 @@ void BackupWriterS3::copyObjectImpl( auto outcome = client->CopyObject(request); - if (!outcome.IsSuccess() && outcome.GetError().GetExceptionName() == "EntityTooLarge") + if (!outcome.IsSuccess() && (outcome.GetError().GetExceptionName() == "EntityTooLarge" + || outcome.GetError().GetExceptionName() == "InvalidRequest")) { // Can't come here with MinIO, MinIO allows single part upload for large objects. copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata); return; diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 185c23a479e..244a51669a1 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -96,6 +96,7 @@ RestorerFromBackup::RestorerFromBackup( , on_cluster_first_sync_timeout(context->getConfigRef().getUInt64("backups.on_cluster_first_sync_timeout", 180000)) , create_table_timeout(context->getConfigRef().getUInt64("backups.create_table_timeout", 300000)) , log(&Poco::Logger::get("RestorerFromBackup")) + , tables_dependencies("RestorerFromBackup") { } @@ -133,6 +134,7 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode) /// Create tables using the create queries read from the backup. setStage(Stage::CREATING_TABLES); + removeUnresolvedDependencies(); createTables(); /// All what's left is to insert data to tables. @@ -341,10 +343,11 @@ void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name TableInfo & res_table_info = table_infos[table_name]; res_table_info.create_table_query = create_table_query; res_table_info.is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table}); - res_table_info.dependencies = getDependenciesSetFromCreateQuery(context->getGlobalContext(), table_name, create_table_query); res_table_info.has_data = backup->hasFiles(data_path_in_backup); res_table_info.data_path_in_backup = data_path_in_backup; + tables_dependencies.addDependencies(table_name, getDependenciesFromCreateQuery(context->getGlobalContext(), table_name, create_table_query)); + if (partitions) { if (!res_table_info.partitions) @@ -622,21 +625,62 @@ void RestorerFromBackup::checkDatabase(const String & database_name) } } +void RestorerFromBackup::removeUnresolvedDependencies() +{ + auto need_exclude_dependency = [this](const StorageID & table_id) + { + /// Table will be restored. + if (table_infos.contains(table_id.getQualifiedName())) + return false; + + /// Table exists and it already exists + if (!DatabaseCatalog::instance().isTableExist(table_id, context)) + { + LOG_WARNING( + log, + "Tables {} in backup depend on {}, but seems like {} is not in the backup and does not exist. " + "Will try to ignore that and restore tables", + fmt::join(tables_dependencies.getDependents(table_id), ", "), + table_id, + table_id); + } + + size_t num_dependencies, num_dependents; + tables_dependencies.getNumberOfAdjacents(table_id, num_dependencies, num_dependents); + if (num_dependencies || !num_dependents) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Table {} in backup doesn't have dependencies and dependent tables as it expected to. It's a bug", + table_id); + + return true; /// Exclude this dependency. + }; + + tables_dependencies.removeTablesIf(need_exclude_dependency); + + if (tables_dependencies.getNumberOfTables() != table_infos.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of tables to be restored is not as expected. It's a bug"); + + if (tables_dependencies.hasCyclicDependencies()) + { + LOG_WARNING( + log, + "Tables {} in backup have cyclic dependencies: {}. Will try to ignore that and restore tables", + fmt::join(tables_dependencies.getTablesWithCyclicDependencies(), ", "), + tables_dependencies.describeCyclicDependencies()); + } +} + void RestorerFromBackup::createTables() { - while (true) + /// We need to create tables considering their dependencies. + auto tables_to_create = tables_dependencies.getTablesSortedByDependency(); + for (const auto & table_id : tables_to_create) { - /// We need to create tables considering their dependencies. - auto tables_to_create = findTablesWithoutDependencies(); - if (tables_to_create.empty()) - break; /// We've already created all the tables. - - for (const auto & table_name : tables_to_create) - { - createTable(table_name); - checkTable(table_name); - insertDataToTable(table_name); - } + auto table_name = table_id.getQualifiedName(); + createTable(table_name); + checkTable(table_name); + insertDataToTable(table_name); } } @@ -752,62 +796,6 @@ void RestorerFromBackup::insertDataToTable(const QualifiedTableName & table_name } } -/// Returns the list of tables without dependencies or those which dependencies have been created before. -std::vector RestorerFromBackup::findTablesWithoutDependencies() const -{ - std::vector tables_without_dependencies; - bool all_tables_created = true; - - for (const auto & [key, table_info] : table_infos) - { - if (table_info.storage) - continue; - - /// Found a table which is not created yet. - all_tables_created = false; - - /// Check if all dependencies have been created before. - bool all_dependencies_met = true; - for (const auto & dependency : table_info.dependencies) - { - auto it = table_infos.find(dependency); - if ((it != table_infos.end()) && !it->second.storage) - { - all_dependencies_met = false; - break; - } - } - - if (all_dependencies_met) - tables_without_dependencies.push_back(key); - } - - if (!tables_without_dependencies.empty()) - return tables_without_dependencies; - - if (all_tables_created) - return {}; - - /// Cyclic dependency? We'll try to create those tables anyway but probably it's going to fail. - std::vector tables_with_cyclic_dependencies; - for (const auto & [key, table_info] : table_infos) - { - if (!table_info.storage) - tables_with_cyclic_dependencies.push_back(key); - } - - /// Only show a warning here, proper exception will be thrown later on creating those tables. - LOG_WARNING( - log, - "Some tables have cyclic dependency from each other: {}", - boost::algorithm::join( - tables_with_cyclic_dependencies - | boost::adaptors::transformed([](const QualifiedTableName & table_name) -> String { return table_name.getFullName(); }), - ", ")); - - return tables_with_cyclic_dependencies; -} - void RestorerFromBackup::addDataRestoreTask(DataRestoreTask && new_task) { if (current_stage == Stage::INSERTING_DATA_TO_TABLES) diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index b081e16e2ce..93b5a6c7694 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -94,6 +95,7 @@ private: void createDatabase(const String & database_name) const; void checkDatabase(const String & database_name); + void removeUnresolvedDependencies(); void createTables(); void createTable(const QualifiedTableName & table_name); void checkTable(const QualifiedTableName & table_name); @@ -114,7 +116,6 @@ private: { ASTPtr create_table_query; bool is_predefined_table = false; - std::unordered_set dependencies; bool has_data = false; std::filesystem::path data_path_in_backup; std::optional partitions; @@ -123,11 +124,10 @@ private: TableLockHolder table_lock; }; - std::vector findTablesWithoutDependencies() const; - String current_stage; std::unordered_map database_infos; std::map table_infos; + TablesDependencyGraph tables_dependencies; std::vector data_restore_tasks; std::unique_ptr access_restorer; bool access_restored = false; diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp index 04d904d0a00..afaaf11b26a 100644 --- a/src/Bridge/IBridge.cpp +++ b/src/Bridge/IBridge.cpp @@ -236,7 +236,7 @@ int IBridge::main(const std::vector & /*args*/) SensitiveDataMasker::setInstance(std::make_unique(config(), "query_masking_rules")); auto server = HTTPServer( - context, + std::make_shared(context), getHandlerFactoryPtr(context), server_pool, socket, diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 8ddd0334396..6d6ce2e006c 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -148,7 +148,8 @@ void Connection::connect(const ConnectionTimeouts & timeouts) socket->setReceiveTimeout(timeouts.receive_timeout); socket->setSendTimeout(timeouts.send_timeout); socket->setNoDelay(true); - if (timeouts.tcp_keep_alive_timeout.totalSeconds()) + int tcp_keep_alive_timeout_in_sec = timeouts.tcp_keep_alive_timeout.totalSeconds(); + if (tcp_keep_alive_timeout_in_sec) { socket->setKeepAlive(true); socket->setOption(IPPROTO_TCP, @@ -157,7 +158,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts) #else TCP_KEEPIDLE // __APPLE__ #endif - , timeouts.tcp_keep_alive_timeout); + , tcp_keep_alive_timeout_in_sec); } in = std::make_shared(*socket); diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp similarity index 73% rename from src/Interpreters/AsynchronousMetrics.cpp rename to src/Common/AsynchronousMetrics.cpp index 291bca4277d..d4626d317c7 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -1,28 +1,16 @@ -#include -#include -#include -#include -#include -#include -#include -#include +#include #include #include #include #include #include -#include #include #include #include -#include -#include -#include -#include +#include #include #include #include -#include #include #include @@ -68,15 +56,11 @@ static std::unique_ptr openFileIfExists(const std::stri AsynchronousMetrics::AsynchronousMetrics( - ContextPtr global_context_, int update_period_seconds, - int heavy_metrics_update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_) - : WithContext(global_context_) - , update_period(update_period_seconds) - , heavy_metric_update_period(heavy_metrics_update_period_seconds) - , protocol_server_metrics_func(protocol_server_metrics_func_) + : update_period(update_period_seconds) , log(&Poco::Logger::get("AsynchronousMetrics")) + , protocol_server_metrics_func(protocol_server_metrics_func_) { #if defined(OS_LINUX) openFileIfExists("/proc/meminfo", meminfo); @@ -360,22 +344,6 @@ void AsynchronousMetrics::run() } } - -template -static void calculateMax(Max & max, T x) -{ - if (Max(x) > max) - max = x; -} - -template -static void calculateMaxAndSum(Max & max, Sum & sum, T x) -{ - sum += x; - if (Max(x) > max) - max = x; -} - #if USE_JEMALLOC uint64_t updateJemallocEpoch() { @@ -575,91 +543,6 @@ void AsynchronousMetrics::update(TimePoint update_time) "The difference in time the thread for calculation of the asynchronous metrics was scheduled to wake up and the time it was in fact, woken up." " A proxy-indicator of overall system latency and responsiveness." }; - if (auto mark_cache = getContext()->getMarkCache()) - { - new_values["MarkCacheBytes"] = { mark_cache->weight(), "Total size of mark cache in bytes" }; - new_values["MarkCacheFiles"] = { mark_cache->count(), "Total number of mark files cached in the mark cache" }; - } - - if (auto uncompressed_cache = getContext()->getUncompressedCache()) - { - new_values["UncompressedCacheBytes"] = { uncompressed_cache->weight(), - "Total size of uncompressed cache in bytes. Uncompressed cache does not usually improve the performance and should be mostly avoided." }; - new_values["UncompressedCacheCells"] = { uncompressed_cache->count(), - "Total number of entries in the uncompressed cache. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided." }; - } - - if (auto index_mark_cache = getContext()->getIndexMarkCache()) - { - new_values["IndexMarkCacheBytes"] = { index_mark_cache->weight(), "Total size of mark cache for secondary indices in bytes." }; - new_values["IndexMarkCacheFiles"] = { index_mark_cache->count(), "Total number of mark files cached in the mark cache for secondary indices." }; - } - - if (auto index_uncompressed_cache = getContext()->getIndexUncompressedCache()) - { - new_values["IndexUncompressedCacheBytes"] = { index_uncompressed_cache->weight(), - "Total size of uncompressed cache in bytes for secondary indices. Uncompressed cache does not usually improve the performance and should be mostly avoided." }; - new_values["IndexUncompressedCacheCells"] = { index_uncompressed_cache->count(), - "Total number of entries in the uncompressed cache for secondary indices. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided." }; - } - - if (auto mmap_cache = getContext()->getMMappedFileCache()) - { - new_values["MMapCacheCells"] = { mmap_cache->count(), - "The number of files opened with `mmap` (mapped in memory)." - " This is used for queries with the setting `local_filesystem_read_method` set to `mmap`." - " The files opened with `mmap` are kept in the cache to avoid costly TLB flushes."}; - } - - { - auto caches = FileCacheFactory::instance().getAll(); - size_t total_bytes = 0; - size_t total_files = 0; - - for (const auto & [_, cache_data] : caches) - { - total_bytes += cache_data->cache->getUsedCacheSize(); - total_files += cache_data->cache->getFileSegmentsNum(); - } - - new_values["FilesystemCacheBytes"] = { total_bytes, - "Total bytes in the `cache` virtual filesystem. This cache is hold on disk." }; - new_values["FilesystemCacheFiles"] = { total_files, - "Total number of cached file segments in the `cache` virtual filesystem. This cache is hold on disk." }; - } - -#if USE_ROCKSDB - if (auto metadata_cache = getContext()->tryGetMergeTreeMetadataCache()) - { - new_values["MergeTreeMetadataCacheSize"] = { metadata_cache->getEstimateNumKeys(), - "The size of the metadata cache for tables. This cache is experimental and not used in production." }; - } -#endif - -#if USE_EMBEDDED_COMPILER - if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache()) - { - new_values["CompiledExpressionCacheBytes"] = { compiled_expression_cache->weight(), - "Total bytes used for the cache of JIT-compiled code." }; - new_values["CompiledExpressionCacheCount"] = { compiled_expression_cache->count(), - "Total entries in the cache of JIT-compiled code." }; - } -#endif - - new_values["Uptime"] = { getContext()->getUptimeSeconds(), - "The server uptime in seconds. It includes the time spent for server initialization before accepting connections." }; - - if (const auto stats = getHashTablesCacheStatistics()) - { - new_values["HashTableStatsCacheEntries"] = { stats->entries, - "The number of entries in the cache of hash table sizes." - " The cache for hash table sizes is used for predictive optimization of GROUP BY." }; - new_values["HashTableStatsCacheHits"] = { stats->hits, - "The number of times the prediction of a hash table size was correct." }; - new_values["HashTableStatsCacheMisses"] = { stats->misses, - "The number of times the prediction of a hash table size was incorrect." }; - } - #if defined(OS_LINUX) || defined(OS_FREEBSD) MemoryStatisticsOS::Data memory_statistics_data = memory_stat.get(); #endif @@ -1519,165 +1402,7 @@ void AsynchronousMetrics::update(TimePoint update_time) } #endif - /// Free space in filesystems at data path and logs path. { - auto stat = getStatVFS(getContext()->getPath()); - - new_values["FilesystemMainPathTotalBytes"] = { stat.f_blocks * stat.f_frsize, - "The size of the volume where the main ClickHouse path is mounted, in bytes." }; - new_values["FilesystemMainPathAvailableBytes"] = { stat.f_bavail * stat.f_frsize, - "Available bytes on the volume where the main ClickHouse path is mounted." }; - new_values["FilesystemMainPathUsedBytes"] = { (stat.f_blocks - stat.f_bavail) * stat.f_frsize, - "Used bytes on the volume where the main ClickHouse path is mounted." }; - new_values["FilesystemMainPathTotalINodes"] = { stat.f_files, - "The total number of inodes on the volume where the main ClickHouse path is mounted. If it is less than 25 million, it indicates a misconfiguration." }; - new_values["FilesystemMainPathAvailableINodes"] = { stat.f_favail, - "The number of available inodes on the volume where the main ClickHouse path is mounted. If it is close to zero, it indicates a misconfiguration, and you will get 'no space left on device' even when the disk is not full." }; - new_values["FilesystemMainPathUsedINodes"] = { stat.f_files - stat.f_favail, - "The number of used inodes on the volume where the main ClickHouse path is mounted. This value mostly corresponds to the number of files." }; - } - - { - /// Current working directory of the server is the directory with logs. - auto stat = getStatVFS("."); - - new_values["FilesystemLogsPathTotalBytes"] = { stat.f_blocks * stat.f_frsize, - "The size of the volume where ClickHouse logs path is mounted, in bytes. It's recommended to have at least 10 GB for logs." }; - new_values["FilesystemLogsPathAvailableBytes"] = { stat.f_bavail * stat.f_frsize, - "Available bytes on the volume where ClickHouse logs path is mounted. If this value approaches zero, you should tune the log rotation in the configuration file." }; - new_values["FilesystemLogsPathUsedBytes"] = { (stat.f_blocks - stat.f_bavail) * stat.f_frsize, - "Used bytes on the volume where ClickHouse logs path is mounted." }; - new_values["FilesystemLogsPathTotalINodes"] = { stat.f_files, - "The total number of inodes on the volume where ClickHouse logs path is mounted." }; - new_values["FilesystemLogsPathAvailableINodes"] = { stat.f_favail, - "The number of available inodes on the volume where ClickHouse logs path is mounted." }; - new_values["FilesystemLogsPathUsedINodes"] = { stat.f_files - stat.f_favail, - "The number of used inodes on the volume where ClickHouse logs path is mounted." }; - } - - /// Free and total space on every configured disk. - { - DisksMap disks_map = getContext()->getDisksMap(); - for (const auto & [name, disk] : disks_map) - { - auto total = disk->getTotalSpace(); - - /// Some disks don't support information about the space. - if (!total) - continue; - - auto available = disk->getAvailableSpace(); - auto unreserved = disk->getUnreservedSpace(); - - new_values[fmt::format("DiskTotal_{}", name)] = { total, - "The total size in bytes of the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." }; - new_values[fmt::format("DiskUsed_{}", name)] = { total - available, - "Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information." }; - new_values[fmt::format("DiskAvailable_{}", name)] = { available, - "Available bytes on the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." }; - new_values[fmt::format("DiskUnreserved_{}", name)] = { unreserved, - "Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems can show a large value like 16 EiB." }; - } - } - - { - auto databases = DatabaseCatalog::instance().getDatabases(); - - size_t max_queue_size = 0; - size_t max_inserts_in_queue = 0; - size_t max_merges_in_queue = 0; - - size_t sum_queue_size = 0; - size_t sum_inserts_in_queue = 0; - size_t sum_merges_in_queue = 0; - - size_t max_absolute_delay = 0; - size_t max_relative_delay = 0; - - size_t max_part_count_for_partition = 0; - - size_t number_of_databases = databases.size(); - size_t total_number_of_tables = 0; - - size_t total_number_of_bytes = 0; - size_t total_number_of_rows = 0; - size_t total_number_of_parts = 0; - - for (const auto & db : databases) - { - /// Check if database can contain MergeTree tables - if (!db.second->canContainMergeTreeTables()) - continue; - - for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next()) - { - ++total_number_of_tables; - const auto & table = iterator->table(); - if (!table) - continue; - - if (MergeTreeData * table_merge_tree = dynamic_cast(table.get())) - { - const auto & settings = getContext()->getSettingsRef(); - - calculateMax(max_part_count_for_partition, table_merge_tree->getMaxPartsCountAndSizeForPartition().first); - total_number_of_bytes += table_merge_tree->totalBytes(settings).value(); - total_number_of_rows += table_merge_tree->totalRows(settings).value(); - total_number_of_parts += table_merge_tree->getPartsCount(); - } - - if (StorageReplicatedMergeTree * table_replicated_merge_tree = typeid_cast(table.get())) - { - StorageReplicatedMergeTree::Status status; - table_replicated_merge_tree->getStatus(status, false); - - calculateMaxAndSum(max_queue_size, sum_queue_size, status.queue.queue_size); - calculateMaxAndSum(max_inserts_in_queue, sum_inserts_in_queue, status.queue.inserts_in_queue); - calculateMaxAndSum(max_merges_in_queue, sum_merges_in_queue, status.queue.merges_in_queue); - - if (!status.is_readonly) - { - try - { - time_t absolute_delay = 0; - time_t relative_delay = 0; - table_replicated_merge_tree->getReplicaDelays(absolute_delay, relative_delay); - - calculateMax(max_absolute_delay, absolute_delay); - calculateMax(max_relative_delay, relative_delay); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__, - "Cannot get replica delay for table: " + backQuoteIfNeed(db.first) + "." + backQuoteIfNeed(iterator->name())); - } - } - } - } - } - - new_values["ReplicasMaxQueueSize"] = { max_queue_size, "Maximum queue size (in the number of operations like get, merge) across Replicated tables." }; - new_values["ReplicasMaxInsertsInQueue"] = { max_inserts_in_queue, "Maximum number of INSERT operations in the queue (still to be replicated) across Replicated tables." }; - new_values["ReplicasMaxMergesInQueue"] = { max_merges_in_queue, "Maximum number of merge operations in the queue (still to be applied) across Replicated tables." }; - - new_values["ReplicasSumQueueSize"] = { sum_queue_size, "Sum queue size (in the number of operations like get, merge) across Replicated tables." }; - new_values["ReplicasSumInsertsInQueue"] = { sum_inserts_in_queue, "Sum of INSERT operations in the queue (still to be replicated) across Replicated tables." }; - new_values["ReplicasSumMergesInQueue"] = { sum_merges_in_queue, "Sum of merge operations in the queue (still to be applied) across Replicated tables." }; - - new_values["ReplicasMaxAbsoluteDelay"] = { max_absolute_delay, "Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data." }; - new_values["ReplicasMaxRelativeDelay"] = { max_relative_delay, "Maximum difference between the replica delay and the delay of the most up-to-date replica of the same table, across Replicated tables." }; - - new_values["MaxPartCountForPartition"] = { max_part_count_for_partition, "Maximum number of parts per partition across all partitions of all tables of MergeTree family. Values larger than 300 indicates misconfiguration, overload, or massive data loading." }; - - new_values["NumberOfDatabases"] = { number_of_databases, "Total number of databases on the server." }; - new_values["NumberOfTables"] = { total_number_of_tables, "Total number of tables summed across the databases on the server, excluding the databases that cannot contain MergeTree tables." - " The excluded database engines are those who generate the set of tables on the fly, like `Lazy`, `MySQL`, `PostgreSQL`, `SQlite`."}; - - new_values["TotalBytesOfMergeTreeTables"] = { total_number_of_bytes, "Total amount of bytes (compressed, including data and indices) stored in all tables of MergeTree family." }; - new_values["TotalRowsOfMergeTreeTables"] = { total_number_of_rows, "Total amount of rows (records) stored in all tables of MergeTree family." }; - new_values["TotalPartsOfMergeTreeTables"] = { total_number_of_parts, "Total amount of data parts in all tables of MergeTree family." - " Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key." }; - auto get_metric_name_doc = [](const String & name) -> std::pair { static std::map> metric_map = @@ -1691,7 +1416,9 @@ void AsynchronousMetrics::update(TimePoint update_time) {"mysql_port", {"MySQLThreads", "Number of threads in the server of the MySQL compatibility protocol."}}, {"postgresql_port", {"PostgreSQLThreads", "Number of threads in the server of the PostgreSQL compatibility protocol."}}, {"grpc_port", {"GRPCThreads", "Number of threads in the server of the GRPC protocol."}}, - {"prometheus.port", {"PrometheusThreads", "Number of threads in the server of the Prometheus endpoint. Note: prometheus endpoints can be also used via the usual HTTP/HTTPs ports."}} + {"prometheus.port", {"PrometheusThreads", "Number of threads in the server of the Prometheus endpoint. Note: prometheus endpoints can be also used via the usual HTTP/HTTPs ports."}}, + {"keeper_server.tcp_port", {"KeeperTCPThreads", "Number of threads in the server of the Keeper TCP protocol (without TLS)."}}, + {"keeper_server.tcp_port_secure", {"KeeperTCPSecureThreads", "Number of threads in the server of the Keeper TCP protocol (with TLS)."}} }; auto it = metric_map.find(name); if (it == metric_map.end()) @@ -1707,102 +1434,14 @@ void AsynchronousMetrics::update(TimePoint update_time) new_values[name_doc.first] = { server_metric.current_threads, name_doc.second }; } } -#if USE_NURAFT - { - auto keeper_dispatcher = getContext()->tryGetKeeperDispatcher(); - if (keeper_dispatcher) - { - size_t is_leader = 0; - size_t is_follower = 0; - size_t is_observer = 0; - size_t is_standalone = 0; - size_t znode_count = 0; - size_t watch_count = 0; - size_t ephemerals_count = 0; - size_t approximate_data_size = 0; - size_t key_arena_size = 0; - size_t latest_snapshot_size = 0; - size_t open_file_descriptor_count = 0; - size_t max_file_descriptor_count = 0; - size_t followers = 0; - size_t synced_followers = 0; - size_t zxid = 0; - size_t session_with_watches = 0; - size_t paths_watched = 0; - size_t snapshot_dir_size = 0; - size_t log_dir_size = 0; - - if (keeper_dispatcher->isServerActive()) - { - auto keeper_info = keeper_dispatcher -> getKeeper4LWInfo(); - is_standalone = static_cast(keeper_info.is_standalone); - is_leader = static_cast(keeper_info.is_leader); - is_observer = static_cast(keeper_info.is_observer); - is_follower = static_cast(keeper_info.is_follower); - - zxid = keeper_info.last_zxid; - const auto & state_machine = keeper_dispatcher->getStateMachine(); - znode_count = state_machine.getNodesCount(); - watch_count = state_machine.getTotalWatchesCount(); - ephemerals_count = state_machine.getTotalEphemeralNodesCount(); - approximate_data_size = state_machine.getApproximateDataSize(); - key_arena_size = state_machine.getKeyArenaSize(); - latest_snapshot_size = state_machine.getLatestSnapshotBufSize(); - session_with_watches = state_machine.getSessionsWithWatchesCount(); - paths_watched = state_machine.getWatchedPathsCount(); - snapshot_dir_size = keeper_dispatcher->getSnapDirSize(); - log_dir_size = keeper_dispatcher->getLogDirSize(); - - #if defined(__linux__) || defined(__APPLE__) - open_file_descriptor_count = getCurrentProcessFDCount(); - max_file_descriptor_count = getMaxFileDescriptorCount(); - #endif - - if (keeper_info.is_leader) - { - followers = keeper_info.follower_count; - synced_followers = keeper_info.synced_follower_count; - } - } - - new_values["KeeperIsLeader"] = { is_leader, "1 if ClickHouse Keeper is a leader, 0 otherwise." }; - new_values["KeeperIsFollower"] = { is_follower, "1 if ClickHouse Keeper is a follower, 0 otherwise." }; - new_values["KeeperIsObserver"] = { is_observer, "1 if ClickHouse Keeper is an observer, 0 otherwise." }; - new_values["KeeperIsStandalone"] = { is_standalone, "1 if ClickHouse Keeper is in a standalone mode, 0 otherwise." }; - - new_values["KeeperZnodeCount"] = { znode_count, "The number of nodes (data entries) in ClickHouse Keeper." }; - new_values["KeeperWatchCount"] = { watch_count, "The number of watches in ClickHouse Keeper." }; - new_values["KeeperEphemeralsCount"] = { ephemerals_count, "The number of ephemeral nodes in ClickHouse Keeper." }; - - new_values["KeeperApproximateDataSize"] = { approximate_data_size, "The approximate data size of ClickHouse Keeper, in bytes." }; - new_values["KeeperKeyArenaSize"] = { key_arena_size, "The size in bytes of the memory arena for keys in ClickHouse Keeper." }; - new_values["KeeperLatestSnapshotSize"] = { latest_snapshot_size, "The uncompressed size in bytes of the latest snapshot created by ClickHouse Keeper." }; - - new_values["KeeperOpenFileDescriptorCount"] = { open_file_descriptor_count, "The number of open file descriptors in ClickHouse Keeper." }; - new_values["KeeperMaxFileDescriptorCount"] = { max_file_descriptor_count, "The maximum number of open file descriptors in ClickHouse Keeper." }; - - new_values["KeeperFollowers"] = { followers, "The number of followers of ClickHouse Keeper." }; - new_values["KeeperSyncedFollowers"] = { synced_followers, "The number of followers of ClickHouse Keeper who are also in-sync." }; - new_values["KeeperZxid"] = { zxid, "The current transaction id number (zxid) in ClickHouse Keeper." }; - new_values["KeeperSessionWithWatches"] = { session_with_watches, "The number of client sessions of ClickHouse Keeper having watches." }; - new_values["KeeperPathsWatched"] = { paths_watched, "The number of different paths watched by the clients of ClickHouse Keeper." }; - new_values["KeeperSnapshotDirSize"] = { snapshot_dir_size, "The size of the snapshots directory of ClickHouse Keeper, in bytes." }; - new_values["KeeperLogDirSize"] = { log_dir_size, "The size of the logs directory of ClickHouse Keeper, in bytes." }; - } - } -#endif - - updateHeavyMetricsIfNeeded(current_time, update_time, new_values); /// Add more metrics as you wish. + updateImpl(new_values, update_time, current_time); + new_values["AsynchronousMetricsCalculationTimeSpent"] = { watch.elapsedSeconds(), "Time in seconds spent for calculation of asynchronous metrics (this is the overhead of asynchronous metrics)." }; - /// Log the new metrics. - if (auto asynchronous_metric_log = getContext()->getAsynchronousMetricLog()) - { - asynchronous_metric_log->addValues(new_values); - } + logImpl(new_values); first_run = false; @@ -1811,75 +1450,4 @@ void AsynchronousMetrics::update(TimePoint update_time) values = new_values; } -void AsynchronousMetrics::updateDetachedPartsStats() -{ - DetachedPartsStats current_values{}; - - for (const auto & db : DatabaseCatalog::instance().getDatabases()) - { - if (!db.second->canContainMergeTreeTables()) - continue; - - for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next()) - { - const auto & table = iterator->table(); - if (!table) - continue; - - if (MergeTreeData * table_merge_tree = dynamic_cast(table.get())) - { - for (const auto & detached_part: table_merge_tree->getDetachedParts()) - { - if (!detached_part.valid_name) - continue; - - if (detached_part.prefix.empty()) - ++current_values.detached_by_user; - - ++current_values.count; - } - } - } - } - - detached_parts_stats = current_values; -} - -void AsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_time, TimePoint update_time, AsynchronousMetricValues & new_values) -{ - const auto time_after_previous_update = current_time - heavy_metric_previous_update_time; - const bool update_heavy_metric = time_after_previous_update >= heavy_metric_update_period || first_run; - - if (update_heavy_metric) - { - heavy_metric_previous_update_time = update_time; - - Stopwatch watch; - - /// Test shows that listing 100000 entries consuming around 0.15 sec. - updateDetachedPartsStats(); - - watch.stop(); - - /// Normally heavy metrics don't delay the rest of the metrics calculation - /// otherwise log the warning message - auto log_level = std::make_pair(DB::LogsLevel::trace, Poco::Message::PRIO_TRACE); - if (watch.elapsedSeconds() > (update_period.count() / 2.)) - log_level = std::make_pair(DB::LogsLevel::debug, Poco::Message::PRIO_DEBUG); - else if (watch.elapsedSeconds() > (update_period.count() / 4. * 3)) - log_level = std::make_pair(DB::LogsLevel::warning, Poco::Message::PRIO_WARNING); - LOG_IMPL(log, log_level.first, log_level.second, - "Update heavy metrics. " - "Update period {} sec. " - "Update heavy metrics period {} sec. " - "Heavy metrics calculation elapsed: {} sec.", - update_period.count(), - heavy_metric_update_period.count(), - watch.elapsedSeconds()); - } - - new_values["NumberOfDetachedParts"] = { detached_parts_stats.count, "The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed." }; - new_values["NumberOfDetachedByUserParts"] = { detached_parts_stats.detached_by_user, "The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed." }; -} - } diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h similarity index 90% rename from src/Interpreters/AsynchronousMetrics.h rename to src/Common/AsynchronousMetrics.h index 22ed2e862ea..54c84734eb3 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Common/AsynchronousMetrics.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -55,17 +54,15 @@ struct ProtocolServerMetrics * All the values are either gauge type (like the total number of tables, the current memory usage). * Or delta-counters representing some accumulation during the interval of time. */ -class AsynchronousMetrics : WithContext +class AsynchronousMetrics { public: using ProtocolServerMetricsFunc = std::function()>; AsynchronousMetrics( - ContextPtr global_context_, int update_period_seconds, - int heavy_metrics_update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_); - ~AsynchronousMetrics(); + virtual ~AsynchronousMetrics(); /// Separate method allows to initialize the `servers` variable beforehand. void start(); @@ -75,12 +72,22 @@ public: /// Returns copy of all values. AsynchronousMetricValues getValues() const; -private: +protected: using Duration = std::chrono::seconds; using TimePoint = std::chrono::system_clock::time_point; const Duration update_period; - const Duration heavy_metric_update_period; + + /// Some values are incremental and we have to calculate the difference. + /// On first run we will only collect the values to subtract later. + bool first_run = true; + TimePoint previous_update_time; + + Poco::Logger * log; +private: + virtual void updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time) = 0; + virtual void logImpl(AsynchronousMetricValues &) {} + ProtocolServerMetricsFunc protocol_server_metrics_func; mutable std::mutex mutex; @@ -88,20 +95,6 @@ private: bool quit {false}; AsynchronousMetricValues values; - /// Some values are incremental and we have to calculate the difference. - /// On first run we will only collect the values to subtract later. - bool first_run = true; - TimePoint previous_update_time; - TimePoint heavy_metric_previous_update_time; - - struct DetachedPartsStats - { - size_t count; - size_t detached_by_user; - }; - - DetachedPartsStats detached_parts_stats{}; - #if defined(OS_LINUX) || defined(OS_FREEBSD) MemoryStatisticsOS memory_stat; #endif @@ -212,11 +205,6 @@ private: void run(); void update(TimePoint update_time); - - void updateDetachedPartsStats(); - void updateHeavyMetricsIfNeeded(TimePoint current_time, TimePoint update_time, AsynchronousMetricValues & new_values); - - Poco::Logger * log; }; } diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 2f8aa487621..84f063f9555 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -1204,6 +1204,11 @@ public: return res; } + template + inline DateTimeComponents toDateTimeComponents(DateOrTime v) const + { + return toDateTimeComponents(lut[toLUTIndex(v)].date); + } inline UInt64 toNumYYYYMMDDhhmmss(Time t) const { diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index e312a84d0f5..1b76fef1db4 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -637,8 +637,9 @@ M(666, CANNOT_USE_CACHE) \ M(667, NOT_INITIALIZED) \ M(668, INVALID_STATE) \ - M(669, UNKNOWN_NAMED_COLLECTION) \ + M(669, NAMED_COLLECTION_DOESNT_EXIST) \ M(670, NAMED_COLLECTION_ALREADY_EXISTS) \ + M(671, NAMED_COLLECTION_IS_IMMUTABLE) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index f556b255fc2..27d0adcf24f 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -178,7 +178,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT if (unlikely(current_profiler_limit && will_be > current_profiler_limit)) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); - DB::TraceCollector::collect(DB::TraceType::Memory, StackTrace(), size); + DB::TraceSender::send(DB::TraceType::Memory, StackTrace(), {.size = size}); setOrRaiseProfilerLimit((will_be + profiler_step - 1) / profiler_step * profiler_step); allocation_traced = true; } @@ -187,7 +187,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); - DB::TraceCollector::collect(DB::TraceType::MemorySample, StackTrace(), size); + DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size}); allocation_traced = true; } @@ -305,7 +305,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT if (peak_updated && allocation_traced) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); - DB::TraceCollector::collect(DB::TraceType::MemoryPeak, StackTrace(), will_be); + DB::TraceSender::send(DB::TraceType::MemoryPeak, StackTrace(), {.size = will_be}); } if (auto * loaded_next = parent.load(std::memory_order_relaxed)) @@ -361,7 +361,7 @@ void MemoryTracker::free(Int64 size) if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); - DB::TraceCollector::collect(DB::TraceType::MemorySample, StackTrace(), -size); + DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size}); } Int64 accounted_size = size; diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 90d24ec027e..e4e718e7ebc 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -1,5 +1,6 @@ #include #include +#include /// Available events. Add something here as you wish. @@ -433,6 +434,15 @@ The server successfully detected this situation and will download merged part fr M(KeeperSnapshotApplysFailed, "Number of failed snapshot applying")\ M(KeeperReadSnapshot, "Number of snapshot read(serialization)")\ M(KeeperSaveSnapshot, "Number of snapshot save")\ + M(KeeperCreateRequest, "Number of create requests")\ + M(KeeperRemoveRequest, "Number of remove requests")\ + M(KeeperSetRequest, "Number of set requests")\ + M(KeeperCheckRequest, "Number of check requests")\ + M(KeeperMultiRequest, "Number of multi requests")\ + M(KeeperMultiReadRequest, "Number of multi read requests")\ + M(KeeperGetRequest, "Number of get requests")\ + M(KeeperListRequest, "Number of list requests")\ + M(KeeperExistsRequest, "Number of exists requests")\ \ M(OverflowBreak, "Number of times, data processing was cancelled by query complexity limitation with setting '*_overflow_mode' = 'break' and the result is incomplete.") \ M(OverflowThrow, "Number of times, data processing was cancelled by query complexity limitation with setting '*_overflow_mode' = 'throw' and exception was thrown.") \ @@ -514,15 +524,29 @@ const char * getDocumentation(Event event) return strings[event]; } - Event end() { return END; } - void increment(Event event, Count amount) { DB::CurrentThread::getProfileEvents().increment(event, amount); } +void Counters::increment(Event event, Count amount) +{ + Counters * current = this; + bool send_to_trace_log = false; + + do + { + send_to_trace_log |= current->trace_profile_events; + current->counters[event].fetch_add(amount, std::memory_order_relaxed); + current = current->parent; + } while (current != nullptr); + + if (unlikely(send_to_trace_log)) + DB::TraceSender::send(DB::TraceType::ProfileEvent, StackTrace(), {.event = event, .increment = amount}); +} + CountersIncrement::CountersIncrement(Counters::Snapshot const & snapshot) { init(); diff --git a/src/Common/ProfileEvents.h b/src/Common/ProfileEvents.h index 6eebb75c5ca..256a17cc080 100644 --- a/src/Common/ProfileEvents.h +++ b/src/Common/ProfileEvents.h @@ -25,10 +25,12 @@ namespace ProfileEvents class Counters { + private: Counter * counters = nullptr; std::unique_ptr counters_holder; /// Used to propagate increments Counters * parent = nullptr; + bool trace_profile_events = false; public: @@ -51,15 +53,7 @@ namespace ProfileEvents return counters[event]; } - inline void increment(Event event, Count amount = 1) - { - Counters * current = this; - do - { - current->counters[event].fetch_add(amount, std::memory_order_relaxed); - current = current->parent; - } while (current != nullptr); - } + void increment(Event event, Count amount = 1); struct Snapshot { @@ -97,6 +91,11 @@ namespace ProfileEvents parent = parent_; } + void setTraceProfileEvents(bool value) + { + trace_profile_events = value; + } + /// Set all counters to zero void resetCounters(); diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index b50e0c0ab49..14a6a06088c 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -1,7 +1,7 @@ #include "QueryProfiler.h" #include -#include +#include #include #include #include @@ -66,7 +66,7 @@ namespace const auto signal_context = *reinterpret_cast(context); const StackTrace stack_trace(signal_context); - TraceCollector::collect(trace_type, stack_trace, 0); + TraceSender::send(trace_type, stack_trace, {}); ProfileEvents::increment(ProfileEvents::QueryProfilerRuns); errno = saved_errno; diff --git a/src/Common/TraceSender.cpp b/src/Common/TraceSender.cpp index ad88e508d06..64d7b2b0eaf 100644 --- a/src/Common/TraceSender.cpp +++ b/src/Common/TraceSender.cpp @@ -14,7 +14,7 @@ namespace /// The performance test query ids can be surprisingly long like /// `aggregating_merge_tree_simple_aggregate_function_string.query100.profile100`, /// so make some allowance for them as well. - constexpr size_t QUERY_ID_MAX_LEN = 128; + constexpr size_t QUERY_ID_MAX_LEN = 100; static_assert(QUERY_ID_MAX_LEN <= std::numeric_limits::max()); } @@ -23,7 +23,7 @@ namespace DB LazyPipeFDs TraceSender::pipe; -void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Int64 size) +void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Extras extras) { constexpr size_t buf_size = sizeof(char) /// TraceCollector stop flag + sizeof(UInt8) /// String size @@ -32,12 +32,14 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Int + sizeof(StackTrace::FramePointers) /// Collected stack trace, maximum capacity + sizeof(TraceType) /// trace type + sizeof(UInt64) /// thread_id - + sizeof(Int64); /// size + + sizeof(Int64) /// size + + sizeof(ProfileEvents::Event) /// event + + sizeof(ProfileEvents::Count); /// increment /// Write should be atomic to avoid overlaps /// (since recursive collect() is possible) static_assert(PIPE_BUF >= 512); - static_assert(buf_size <= 512, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512"); + static_assert(buf_size <= PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512"); char buffer[buf_size]; WriteBufferFromFileDescriptorDiscardOnFailure out(pipe.fds_rw[1], buf_size, buffer); @@ -71,7 +73,9 @@ void TraceSender::send(TraceType trace_type, const StackTrace & stack_trace, Int writePODBinary(trace_type, out); writePODBinary(thread_id, out); - writePODBinary(size, out); + writePODBinary(extras.size, out); + writePODBinary(extras.event, out); + writePODBinary(extras.increment, out); out.next(); } diff --git a/src/Common/TraceSender.h b/src/Common/TraceSender.h index a93e605a6e7..21b44b651dd 100644 --- a/src/Common/TraceSender.h +++ b/src/Common/TraceSender.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include class StackTrace; @@ -17,6 +18,7 @@ enum class TraceType : uint8_t Memory, MemorySample, MemoryPeak, + ProfileEvent, }; /// This is the second part of TraceCollector, that sends stacktrace to the pipe. @@ -24,10 +26,18 @@ enum class TraceType : uint8_t class TraceSender { public: + struct Extras + { + /// size - for memory tracing is the amount of memory allocated; for other trace types it is 0. + Int64 size{}; + /// Event type and increment for 'ProfileEvent' trace type; for other trace types defaults. + ProfileEvents::Event event{ProfileEvents::end()}; + ProfileEvents::Count increment{}; + }; + /// Collect a stack trace. This method is signal safe. /// Precondition: the TraceCollector object must be created. - /// size - for memory tracing is the amount of memory allocated; for other trace types it is 0. - static void send(TraceType trace_type, const StackTrace & stack_trace, Int64 size); + static void send(TraceType trace_type, const StackTrace & stack_trace, Extras extras); private: friend class TraceCollector; diff --git a/src/Common/XMLUtils.cpp b/src/Common/XMLUtils.cpp index 3d15400461e..db84e00adce 100644 --- a/src/Common/XMLUtils.cpp +++ b/src/Common/XMLUtils.cpp @@ -42,15 +42,15 @@ public: return s; } - template + template static ValueType getValue(const Node * node, const std::string & path, - const std::optional & default_value, const ParseFunction & parse_function) + const ValueType & default_value, const ParseFunction & parse_function) { const auto * value_node = node->getNodeByPath(path); if (!value_node) { - if (default_value) - return *default_value; + if constexpr (ReturnDefault) + return default_value; else throw Poco::NotFoundException(path); } @@ -59,34 +59,64 @@ public: }; -std::string getString(const Node * node, const std::string & path, const std::optional & default_value) +std::string getString(const Node * node, const std::string & path) { - return ParseHelper::getValue(node, path, default_value, ParseHelper::parseString); + return ParseHelper::getValue(node, path, {}, ParseHelper::parseString); } -Int64 getInt64(const Node * node, const std::string & path, const std::optional & default_value) +std::string getString(const Node * node, const std::string & path, const std::string & default_value) { - return ParseHelper::getValue(node, path, default_value, ParseHelper::parseInt64); + return ParseHelper::getValue(node, path, default_value, ParseHelper::parseString); } -UInt64 getUInt64(const Node * node, const std::string & path, const std::optional & default_value) +Int64 getInt64(const Node * node, const std::string & path) { - return ParseHelper::getValue(node, path, default_value, ParseHelper::parseUInt64); + return ParseHelper::getValue(node, path, {}, ParseHelper::parseInt64); } -int getInt(const Node * node, const std::string & path, const std::optional & default_value) +Int64 getInt64(const Node * node, const std::string & path, Int64 default_value) { - return ParseHelper::getValue(node, path, default_value, ParseHelper::parseInt); + return ParseHelper::getValue(node, path, default_value, ParseHelper::parseInt64); } -unsigned getUInt(const Node * node, const std::string & path, const std::optional & default_value) +UInt64 getUInt64(const Node * node, const std::string & path) { - return ParseHelper::getValue(node, path, default_value, ParseHelper::parseUInt); + return ParseHelper::getValue(node, path, {}, ParseHelper::parseUInt64); } -bool getBool(const Node * node, const std::string & path, const std::optional & default_value) +UInt64 getUInt64(const Node * node, const std::string & path, UInt64 default_value) { - return ParseHelper::getValue(node, path, default_value, ParseHelper::parseBool); + return ParseHelper::getValue(node, path, default_value, ParseHelper::parseUInt64); +} + +int getInt(const Node * node, const std::string & path) +{ + return ParseHelper::getValue(node, path, {}, ParseHelper::parseInt); +} + +int getInt(const Node * node, const std::string & path, int default_value) +{ + return ParseHelper::getValue(node, path, default_value, ParseHelper::parseInt); +} + +unsigned getUInt(const Node * node, const std::string & path) +{ + return ParseHelper::getValue(node, path, {}, ParseHelper::parseUInt); +} + +unsigned getUInt(const Node * node, const std::string & path, unsigned default_value) +{ + return ParseHelper::getValue(node, path, default_value, ParseHelper::parseUInt); +} + +bool getBool(const Node * node, const std::string & path) +{ + return ParseHelper::getValue(node, path, {}, ParseHelper::parseBool); +} + +bool getBool(const Node * node, const std::string & path, bool default_value) +{ + return ParseHelper::getValue(node, path, default_value, ParseHelper::parseBool); } } diff --git a/src/Common/XMLUtils.h b/src/Common/XMLUtils.h index 24efc691704..af9613b67ad 100644 --- a/src/Common/XMLUtils.h +++ b/src/Common/XMLUtils.h @@ -7,17 +7,26 @@ namespace DB:: XMLUtils { +/// Returns root element of the document. Poco::XML::Node * getRootNode(Poco::XML::Document * document); -std::string getString(const Poco::XML::Node * node, const std::string & path, const std::optional & default_value = std::nullopt); +/// Finds the element in the node's subtree by the specified path and returns its inner text +/// trying to parse it as the requested type. +/// Throws an exception if path is not found. +std::string getString(const Poco::XML::Node * node, const std::string & path); +Int64 getInt64(const Poco::XML::Node * node, const std::string & path); +UInt64 getUInt64(const Poco::XML::Node * node, const std::string & path); +int getInt(const Poco::XML::Node * node, const std::string & path); +unsigned getUInt(const Poco::XML::Node * node, const std::string & path); +bool getBool(const Poco::XML::Node * node, const std::string & path); -Int64 getInt64(const Poco::XML::Node * node, const std::string & path, const std::optional & default_value = std::nullopt); - -UInt64 getUInt64(const Poco::XML::Node * node, const std::string & path, const std::optional & default_value = std::nullopt); - -int getInt(const Poco::XML::Node * node, const std::string & path, const std::optional & default_value = std::nullopt); - -unsigned getUInt(const Poco::XML::Node * node, const std::string & path, const std::optional & default_value = std::nullopt); - -bool getBool(const Poco::XML::Node * node, const std::string & path, const std::optional & default_value = std::nullopt); +/// Finds the element in the node's subtree by the specified path and returns its inner text +/// trying to parse it as the requested type. +/// Returns the specified default value if path is not found. +std::string getString(const Poco::XML::Node * node, const std::string & path, const std::string & default_value); +Int64 getInt64(const Poco::XML::Node * node, const std::string & path, Int64 default_value); +UInt64 getUInt64(const Poco::XML::Node * node, const std::string & path, UInt64 default_value); +int getInt(const Poco::XML::Node * node, const std::string & path, int default_value); +unsigned getUInt(const Poco::XML::Node * node, const std::string & path, unsigned default_value); +bool getBool(const Poco::XML::Node * node, const std::string & path, bool default_value); } diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index ebab18b5ed7..7cbe7d7b0f2 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -466,7 +466,7 @@ void ZooKeeper::connect( } else { - LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}{}", socket.peerAddress().toString(), session_id, fail_reasons.str()); + LOG_INFO(log, "Connected to ZooKeeper at {} with session_id {}{}", socket.peerAddress().toString(), session_id, fail_reasons.str()); } } @@ -867,12 +867,12 @@ void ZooKeeper::finalize(bool error_send, bool error_receive, const String & rea /// If some thread (send/receive) already finalizing session don't try to do it bool already_started = finalization_started.test_and_set(); - LOG_TEST(log, "Finalizing session {}: finalization_started={}, queue_finished={}, reason={}", - session_id, already_started, requests_queue.isFinished(), reason); - if (already_started) return; + LOG_INFO(log, "Finalizing session {}: finalization_started={}, queue_finished={}, reason={}", + session_id, already_started, requests_queue.isFinished(), reason); + auto expire_session_if_not_expired = [&] { /// No new requests will appear in queue after finish() diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index cc5f292eae2..c85caa43f19 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -117,7 +117,7 @@ public: WriteBuffer * working_buf = compressed_buffer ? compressed_buffer->getNestedBuffer() : file_buf.get(); - /// Flush working buffer to file system + /// Flush working buffer to file system working_buf->next(); /// Fsync file system if needed @@ -280,6 +280,7 @@ Changelog::Changelog( , force_sync(force_sync_) , log(log_) , compress_logs(compress_logs_) + , write_operations(std::numeric_limits::max()) { /// Load all files in changelog directory namespace fs = std::filesystem; @@ -299,10 +300,13 @@ Changelog::Changelog( LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", changelogs_dir.generic_string()); clean_log_thread = ThreadFromGlobalPool([this] { cleanLogThread(); }); + + write_thread = ThreadFromGlobalPool([this] { writeThread(); }); } void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep) { + std::lock_guard writer_lock(writer_mutex); std::optional last_log_read_result; /// Last log has some free space to write @@ -336,7 +340,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin removeAllLogs(); min_log_id = last_commited_log_index; max_log_id = last_commited_log_index == 0 ? 0 : last_commited_log_index - 1; - rotate(max_log_id + 1); + rotate(max_log_id + 1, writer_lock); return; } else if (changelog_description.from_log_index > start_to_read_from) @@ -427,7 +431,9 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin /// Start new log if we don't initialize writer from previous log. All logs can be "complete". if (!current_writer) - rotate(max_log_id + 1); + rotate(max_log_id + 1, writer_lock); + + initialized = true; } @@ -500,10 +506,11 @@ void Changelog::removeAllLogs() logs.clear(); } -void Changelog::rotate(uint64_t new_start_log_index) +void Changelog::rotate(uint64_t new_start_log_index, std::lock_guard &) { /// Flush previous log - flush(); + if (current_writer) + current_writer->flush(force_sync); /// Start new one ChangelogFileDescription new_description; @@ -540,50 +547,96 @@ ChangelogRecord Changelog::buildRecord(uint64_t index, const LogEntryPtr & log_e return record; } +void Changelog::writeThread() +{ + WriteOperation write_operation; + while (write_operations.pop(write_operation)) + { + assert(initialized); + + if (auto * append_log = std::get_if(&write_operation)) + { + std::lock_guard writer_lock(writer_mutex); + assert(current_writer); + + const auto & current_changelog_description = existing_changelogs[current_writer->getStartIndex()]; + const bool log_is_complete = append_log->index - current_writer->getStartIndex() == current_changelog_description.expectedEntriesCountInLog(); + + if (log_is_complete) + rotate(append_log->index, writer_lock); + + current_writer->appendRecord(buildRecord(append_log->index, append_log->log_entry)); + } + else + { + const auto & flush = std::get(write_operation); + + { + std::lock_guard writer_lock(writer_mutex); + if (current_writer) + current_writer->flush(force_sync); + } + + { + std::lock_guard lock{durable_idx_mutex}; + last_durable_idx = flush.index; + } + + durable_idx_cv.notify_all(); + + // we shouldn't start the raft_server before sending it here + if (auto raft_server_locked = raft_server.lock()) + raft_server_locked->notify_log_append_completion(true); + else + LOG_WARNING(log, "Raft server is not set in LogStore."); + } + } +} + + void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry) { - if (!current_writer) + if (!initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before appending records"); if (logs.empty()) min_log_id = index; - const auto & current_changelog_description = existing_changelogs[current_writer->getStartIndex()]; - const bool log_is_complete = index - current_writer->getStartIndex() == current_changelog_description.expectedEntriesCountInLog(); - - if (log_is_complete) - rotate(index); - - current_writer->appendRecord(buildRecord(index, log_entry)); logs[index] = log_entry; max_log_id = index; + + if (!write_operations.tryPush(AppendLog{index, log_entry})) + LOG_WARNING(log, "Changelog is shut down"); } void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) { - /// This write_at require to overwrite everything in this file and also in previous file(s) - const bool go_to_previous_file = index < current_writer->getStartIndex(); - - if (go_to_previous_file) { - auto index_changelog = existing_changelogs.lower_bound(index); + std::lock_guard lock(writer_mutex); + /// This write_at require to overwrite everything in this file and also in previous file(s) + const bool go_to_previous_file = index < current_writer->getStartIndex(); - ChangelogFileDescription description; - - if (index_changelog->first == index) /// exactly this file starts from index - description = index_changelog->second; - else - description = std::prev(index_changelog)->second; - - /// Initialize writer from this log file - current_writer = std::make_unique(description.path, WriteMode::Append, index_changelog->first); - - /// Remove all subsequent files if overwritten something in previous one - auto to_remove_itr = existing_changelogs.upper_bound(index); - for (auto itr = to_remove_itr; itr != existing_changelogs.end();) + if (go_to_previous_file) { - std::filesystem::remove(itr->second.path); - itr = existing_changelogs.erase(itr); + auto index_changelog = existing_changelogs.lower_bound(index); + + ChangelogFileDescription description; + + if (index_changelog->first == index) /// exactly this file starts from index + description = index_changelog->second; + else + description = std::prev(index_changelog)->second; + + /// Initialize writer from this log file + current_writer = std::make_unique(description.path, WriteMode::Append, index_changelog->first); + + /// Remove all subsequent files if overwritten something in previous one + auto to_remove_itr = existing_changelogs.upper_bound(index); + for (auto itr = to_remove_itr; itr != existing_changelogs.end();) + { + std::filesystem::remove(itr->second.path); + itr = existing_changelogs.erase(itr); + } } } @@ -597,6 +650,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) void Changelog::compact(uint64_t up_to_log_index) { + std::lock_guard lock(writer_mutex); LOG_INFO(log, "Compact logs up to log index {}, our max log id is {}", up_to_log_index, max_log_id); bool remove_all_logs = false; @@ -643,7 +697,7 @@ void Changelog::compact(uint64_t up_to_log_index) std::erase_if(logs, [up_to_log_index] (const auto & item) { return item.first <= up_to_log_index; }); if (need_rotate) - rotate(up_to_log_index + 1); + rotate(up_to_log_index + 1, lock); LOG_INFO(log, "Compaction up to {} finished new min index {}, new max index {}", up_to_log_index, min_log_id, max_log_id); } @@ -747,8 +801,19 @@ void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer) void Changelog::flush() { - if (current_writer) - current_writer->flush(force_sync); + if (flushAsync()) + { + std::unique_lock lock{durable_idx_mutex}; + durable_idx_cv.wait(lock, [&] { return last_durable_idx == max_log_id; }); + } +} + +bool Changelog::flushAsync() +{ + bool pushed = write_operations.push(Flush{max_log_id}); + if (!pushed) + LOG_WARNING(log, "Changelog is shut down"); + return pushed; } void Changelog::shutdown() @@ -758,6 +823,12 @@ void Changelog::shutdown() if (clean_log_thread.joinable()) clean_log_thread.join(); + + if (!write_operations.isFinished()) + write_operations.finish(); + + if (write_thread.joinable()) + write_thread.join(); } Changelog::~Changelog() @@ -789,4 +860,10 @@ void Changelog::cleanLogThread() } } +void Changelog::setRaftServer(const nuraft::ptr & raft_server_) +{ + assert(raft_server_); + raft_server = raft_server_; +} + } diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 9f90f72d9f3..a9464a59003 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -1,8 +1,10 @@ #pragma once #include +#include #include #include +#include #include #include #include @@ -121,6 +123,8 @@ public: /// Fsync latest log to disk and flush buffer void flush(); + bool flushAsync(); + void shutdown(); uint64_t size() const @@ -128,6 +132,14 @@ public: return logs.size(); } + uint64_t lastDurableIndex() const + { + std::lock_guard lock{durable_idx_mutex}; + return last_durable_idx; + } + + void setRaftServer(const nuraft::ptr & raft_server_); + /// Fsync log to disk ~Changelog(); @@ -136,7 +148,7 @@ private: static ChangelogRecord buildRecord(uint64_t index, const LogEntryPtr & log_entry); /// Starts new file [new_start_log_index, new_start_log_index + rotate_interval] - void rotate(uint64_t new_start_log_index); + void rotate(uint64_t new_start_log_index, std::lock_guard & writer_lock); /// Currently existing changelogs std::map existing_changelogs; @@ -162,7 +174,7 @@ private: Poco::Logger * log; bool compress_logs; - + std::mutex writer_mutex; /// Current writer for changelog file std::unique_ptr current_writer; /// Mapping log_id -> log_entry @@ -175,6 +187,33 @@ private: /// 128 is enough, even if log is not removed, it's not a problem ConcurrentBoundedQueue log_files_to_delete_queue{128}; ThreadFromGlobalPool clean_log_thread; + + struct AppendLog + { + uint64_t index; + nuraft::ptr log_entry; + }; + + struct Flush + { + uint64_t index; + }; + + using WriteOperation = std::variant; + + void writeThread(); + + ThreadFromGlobalPool write_thread; + ConcurrentBoundedQueue write_operations; + + // last_durable_index needs to be exposed through const getter so we make mutex mutable + mutable std::mutex durable_idx_mutex; + std::condition_variable durable_idx_cv; + uint64_t last_durable_idx{0}; + + nuraft::wptr raft_server; + + bool initialized = false; }; } diff --git a/src/Coordination/KeeperAsynchronousMetrics.cpp b/src/Coordination/KeeperAsynchronousMetrics.cpp new file mode 100644 index 00000000000..2d523a26dcc --- /dev/null +++ b/src/Coordination/KeeperAsynchronousMetrics.cpp @@ -0,0 +1,127 @@ +#include + +#include + +#include +#include + +namespace DB +{ + +void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousMetricValues & new_values) +{ +#if USE_NURAFT + size_t is_leader = 0; + size_t is_follower = 0; + size_t is_observer = 0; + size_t is_standalone = 0; + size_t znode_count = 0; + size_t watch_count = 0; + size_t ephemerals_count = 0; + size_t approximate_data_size = 0; + size_t key_arena_size = 0; + size_t latest_snapshot_size = 0; + size_t open_file_descriptor_count = 0; + size_t max_file_descriptor_count = 0; + size_t followers = 0; + size_t synced_followers = 0; + size_t zxid = 0; + size_t session_with_watches = 0; + size_t paths_watched = 0; + size_t snapshot_dir_size = 0; + size_t log_dir_size = 0; + + if (keeper_dispatcher.isServerActive()) + { + auto keeper_info = keeper_dispatcher.getKeeper4LWInfo(); + is_standalone = static_cast(keeper_info.is_standalone); + is_leader = static_cast(keeper_info.is_leader); + is_observer = static_cast(keeper_info.is_observer); + is_follower = static_cast(keeper_info.is_follower); + + zxid = keeper_info.last_zxid; + const auto & state_machine = keeper_dispatcher.getStateMachine(); + znode_count = state_machine.getNodesCount(); + watch_count = state_machine.getTotalWatchesCount(); + ephemerals_count = state_machine.getTotalEphemeralNodesCount(); + approximate_data_size = state_machine.getApproximateDataSize(); + key_arena_size = state_machine.getKeyArenaSize(); + latest_snapshot_size = state_machine.getLatestSnapshotBufSize(); + session_with_watches = state_machine.getSessionsWithWatchesCount(); + paths_watched = state_machine.getWatchedPathsCount(); + snapshot_dir_size = keeper_dispatcher.getSnapDirSize(); + log_dir_size = keeper_dispatcher.getLogDirSize(); + +# if defined(__linux__) || defined(__APPLE__) + open_file_descriptor_count = getCurrentProcessFDCount(); + max_file_descriptor_count = getMaxFileDescriptorCount(); +# endif + + if (keeper_info.is_leader) + { + followers = keeper_info.follower_count; + synced_followers = keeper_info.synced_follower_count; + } + } + + new_values["KeeperIsLeader"] = { is_leader, "1 if ClickHouse Keeper is a leader, 0 otherwise." }; + new_values["KeeperIsFollower"] = { is_follower, "1 if ClickHouse Keeper is a follower, 0 otherwise." }; + new_values["KeeperIsObserver"] = { is_observer, "1 if ClickHouse Keeper is an observer, 0 otherwise." }; + new_values["KeeperIsStandalone"] = { is_standalone, "1 if ClickHouse Keeper is in a standalone mode, 0 otherwise." }; + + new_values["KeeperZnodeCount"] = { znode_count, "The number of nodes (data entries) in ClickHouse Keeper." }; + new_values["KeeperWatchCount"] = { watch_count, "The number of watches in ClickHouse Keeper." }; + new_values["KeeperEphemeralsCount"] = { ephemerals_count, "The number of ephemeral nodes in ClickHouse Keeper." }; + + new_values["KeeperApproximateDataSize"] = { approximate_data_size, "The approximate data size of ClickHouse Keeper, in bytes." }; + new_values["KeeperKeyArenaSize"] = { key_arena_size, "The size in bytes of the memory arena for keys in ClickHouse Keeper." }; + new_values["KeeperLatestSnapshotSize"] = { latest_snapshot_size, "The uncompressed size in bytes of the latest snapshot created by ClickHouse Keeper." }; + + new_values["KeeperOpenFileDescriptorCount"] = { open_file_descriptor_count, "The number of open file descriptors in ClickHouse Keeper." }; + new_values["KeeperMaxFileDescriptorCount"] = { max_file_descriptor_count, "The maximum number of open file descriptors in ClickHouse Keeper." }; + + new_values["KeeperFollowers"] = { followers, "The number of followers of ClickHouse Keeper." }; + new_values["KeeperSyncedFollowers"] = { synced_followers, "The number of followers of ClickHouse Keeper who are also in-sync." }; + new_values["KeeperZxid"] = { zxid, "The current transaction id number (zxid) in ClickHouse Keeper." }; + new_values["KeeperSessionWithWatches"] = { session_with_watches, "The number of client sessions of ClickHouse Keeper having watches." }; + new_values["KeeperPathsWatched"] = { paths_watched, "The number of different paths watched by the clients of ClickHouse Keeper." }; + new_values["KeeperSnapshotDirSize"] = { snapshot_dir_size, "The size of the snapshots directory of ClickHouse Keeper, in bytes." }; + new_values["KeeperLogDirSize"] = { log_dir_size, "The size of the logs directory of ClickHouse Keeper, in bytes." }; + + auto keeper_log_info = keeper_dispatcher.getKeeperLogInfo(); + + new_values["KeeperLastLogIdx"] = { keeper_log_info.last_log_idx, "Index of the last log stored in ClickHouse Keeper." }; + new_values["KeeperLastLogTerm"] = { keeper_log_info.last_log_term, "Raft term of the last log stored in ClickHouse Keeper." }; + + new_values["KeeperLastCommittedLogIdx"] = { keeper_log_info.last_committed_log_idx, "Index of the last committed log in ClickHouse Keeper." }; + new_values["KeeperTargetCommitLogIdx"] = { keeper_log_info.target_committed_log_idx, "Index until which logs can be committed in ClickHouse Keeper." }; + new_values["KeeperLastSnapshotIdx"] = { keeper_log_info.last_snapshot_idx, "Index of the last log present in the last created snapshot." }; + + auto & keeper_connection_stats = keeper_dispatcher.getKeeperConnectionStats(); + + new_values["KeeperMinLatency"] = { keeper_connection_stats.getMinLatency(), "Minimal request latency of ClickHouse Keeper." }; + new_values["KeeperMaxLatency"] = { keeper_connection_stats.getMaxLatency(), "Maximum request latency of ClickHouse Keeper." }; + new_values["KeeperAvgLatency"] = { keeper_connection_stats.getAvgLatency(), "Average request latency of ClickHouse Keeper." }; + new_values["KeeperPacketsReceived"] = { keeper_connection_stats.getPacketsReceived(), "Number of packets received by ClickHouse Keeper." }; + new_values["KeeperPacketsSent"] = { keeper_connection_stats.getPacketsSent(), "Number of packets sent by ClickHouse Keeper." }; +#endif +} + +KeeperAsynchronousMetrics::KeeperAsynchronousMetrics( + TinyContextPtr tiny_context_, int update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_) + : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_), tiny_context(std::move(tiny_context_)) +{ +} + +void KeeperAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values, TimePoint /*update_time*/, TimePoint /*current_time*/) +{ +#if USE_NURAFT + { + auto keeper_dispatcher = tiny_context->tryGetKeeperDispatcher(); + if (keeper_dispatcher) + updateKeeperInformation(*keeper_dispatcher, new_values); + } +#endif +} + +} diff --git a/src/Coordination/KeeperAsynchronousMetrics.h b/src/Coordination/KeeperAsynchronousMetrics.h new file mode 100644 index 00000000000..8fa27336bc5 --- /dev/null +++ b/src/Coordination/KeeperAsynchronousMetrics.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class KeeperDispatcher; +void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousMetricValues & new_values); + +class KeeperAsynchronousMetrics : public AsynchronousMetrics +{ +public: + KeeperAsynchronousMetrics( + TinyContextPtr tiny_context_, int update_period_seconds, const ProtocolServerMetricsFunc & protocol_server_metrics_func_); + +private: + TinyContextPtr tiny_context; + + void updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time) override; +}; + + +} diff --git a/src/Coordination/KeeperLogStore.cpp b/src/Coordination/KeeperLogStore.cpp index 3787f30626b..ea72022af09 100644 --- a/src/Coordination/KeeperLogStore.cpp +++ b/src/Coordination/KeeperLogStore.cpp @@ -109,7 +109,7 @@ uint64_t KeeperLogStore::size() const void KeeperLogStore::end_of_append_batch(uint64_t /*start_index*/, uint64_t /*count*/) { std::lock_guard lock(changelog_lock); - changelog.flush(); + changelog.flushAsync(); } nuraft::ptr KeeperLogStore::getLatestConfigChange() const @@ -132,4 +132,16 @@ bool KeeperLogStore::flushChangelogAndShutdown() return true; } +uint64_t KeeperLogStore::last_durable_index() +{ + std::lock_guard lock(changelog_lock); + return changelog.lastDurableIndex(); +} + +void KeeperLogStore::setRaftServer(const nuraft::ptr & raft_server) +{ + std::lock_guard lock(changelog_lock); + return changelog.setRaftServer(raft_server); +} + } diff --git a/src/Coordination/KeeperLogStore.h b/src/Coordination/KeeperLogStore.h index e1c66599e0a..260a6b29320 100644 --- a/src/Coordination/KeeperLogStore.h +++ b/src/Coordination/KeeperLogStore.h @@ -62,12 +62,16 @@ public: /// Current log storage size uint64_t size() const; + uint64_t last_durable_index() override; + /// Flush batch of appended entries void end_of_append_batch(uint64_t start_index, uint64_t count) override; /// Get entry with latest config in logstore nuraft::ptr getLatestConfigChange() const; + void setRaftServer(const nuraft::ptr & raft_server); + private: mutable std::mutex changelog_lock; Poco::Logger * log; diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 15470115998..ba2b08312a0 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -266,6 +266,7 @@ void KeeperServer::forceRecovery() void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6) { nuraft::raft_params params; + params.parallel_log_appending_ = true; params.heart_beat_interval_ = getValueOrMaxInt32AndLogWarning(coordination_settings->heart_beat_interval_ms.totalMilliseconds(), "heart_beat_interval_ms", log); params.election_timeout_lower_bound_ = getValueOrMaxInt32AndLogWarning( @@ -352,6 +353,8 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co if (!raft_instance) throw Exception(ErrorCodes::RAFT_ERROR, "Cannot allocate RAFT instance"); + state_manager->getLogStore()->setRaftServer(raft_instance); + raft_instance->start_server(init_options.skip_initial_election_timeout_); nuraft::ptr casted_raft_server = raft_instance; @@ -446,8 +449,8 @@ void KeeperServer::shutdownRaftServer() void KeeperServer::shutdown() { - state_manager->flushAndShutDownLogStore(); shutdownRaftServer(); + state_manager->flushAndShutDownLogStore(); state_machine->shutdownStorage(); } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 3153d17899d..fb472201aec 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -27,6 +28,19 @@ #include #include +namespace ProfileEvents +{ + extern const Event KeeperCreateRequest; + extern const Event KeeperRemoveRequest; + extern const Event KeeperSetRequest; + extern const Event KeeperCheckRequest; + extern const Event KeeperMultiRequest; + extern const Event KeeperMultiReadRequest; + extern const Event KeeperGetRequest; + extern const Event KeeperListRequest; + extern const Event KeeperExistsRequest; +} + namespace DB { @@ -865,6 +879,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override { + ProfileEvents::increment(ProfileEvents::KeeperCreateRequest); Coordination::ZooKeeperCreateRequest & request = dynamic_cast(*zk_request); std::vector new_deltas; @@ -986,6 +1001,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { + ProfileEvents::increment(ProfileEvents::KeeperGetRequest); Coordination::ZooKeeperGetRequest & request = dynamic_cast(*zk_request); if (request.path == Coordination::keeper_api_version_path) @@ -1040,6 +1056,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override { + ProfileEvents::increment(ProfileEvents::KeeperGetRequest); return processImpl(storage, zxid); } }; @@ -1055,6 +1072,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & digest, const KeeperContext & keeper_context) const override { + ProfileEvents::increment(ProfileEvents::KeeperRemoveRequest); Coordination::ZooKeeperRemoveRequest & request = dynamic_cast(*zk_request); std::vector new_deltas; @@ -1145,6 +1163,7 @@ struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestPr std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { + ProfileEvents::increment(ProfileEvents::KeeperExistsRequest); Coordination::ZooKeeperExistsRequest & request = dynamic_cast(*zk_request); if (!storage.uncommitted_state.getNode(request.path)) @@ -1194,6 +1213,7 @@ struct KeeperStorageExistsRequestProcessor final : public KeeperStorageRequestPr Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override { + ProfileEvents::increment(ProfileEvents::KeeperExistsRequest); return processImpl(storage, zxid); } }; @@ -1209,6 +1229,7 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override { + ProfileEvents::increment(ProfileEvents::KeeperSetRequest); Coordination::ZooKeeperSetRequest & request = dynamic_cast(*zk_request); std::vector new_deltas; @@ -1301,6 +1322,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { + ProfileEvents::increment(ProfileEvents::KeeperListRequest); Coordination::ZooKeeperListRequest & request = dynamic_cast(*zk_request); if (!storage.uncommitted_state.getNode(request.path)) @@ -1387,6 +1409,7 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override { + ProfileEvents::increment(ProfileEvents::KeeperListRequest); return processImpl(storage, zxid); } }; @@ -1402,6 +1425,7 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { + ProfileEvents::increment(ProfileEvents::KeeperCheckRequest); Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); if (!storage.uncommitted_state.getNode(request.path)) @@ -1463,6 +1487,7 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override { + ProfileEvents::increment(ProfileEvents::KeeperCheckRequest); return processImpl(storage, zxid); } }; @@ -1689,6 +1714,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t session_id, int64_t time, uint64_t & digest, const KeeperContext & keeper_context) const override { + ProfileEvents::increment(ProfileEvents::KeeperMultiRequest); std::vector response_errors; response_errors.reserve(concrete_requests.size()); uint64_t current_digest = digest; @@ -1756,6 +1782,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro Coordination::ZooKeeperResponsePtr processLocal(KeeperStorage & storage, int64_t zxid) const override { + ProfileEvents::increment(ProfileEvents::KeeperMultiReadRequest); Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); Coordination::ZooKeeperMultiResponse & response = dynamic_cast(*response_ptr); diff --git a/programs/keeper/TinyContext.cpp b/src/Coordination/TinyContext.cpp similarity index 98% rename from programs/keeper/TinyContext.cpp rename to src/Coordination/TinyContext.cpp index 09174838c04..967e6b23d70 100644 --- a/programs/keeper/TinyContext.cpp +++ b/src/Coordination/TinyContext.cpp @@ -1,4 +1,4 @@ -#include "TinyContext.h" +#include #include #include diff --git a/programs/keeper/TinyContext.h b/src/Coordination/TinyContext.h similarity index 88% rename from programs/keeper/TinyContext.h rename to src/Coordination/TinyContext.h index 1cbbc725090..b966d445004 100644 --- a/programs/keeper/TinyContext.h +++ b/src/Coordination/TinyContext.h @@ -10,7 +10,7 @@ namespace DB class KeeperDispatcher; -class TinyContext: public std::enable_shared_from_this +class TinyContext : public std::enable_shared_from_this { public: std::shared_ptr getKeeperDispatcher() const; @@ -31,4 +31,6 @@ private: ConfigurationPtr config TSA_GUARDED_BY(keeper_dispatcher_mutex); }; +using TinyContextPtr = std::shared_ptr; + } diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index fa4c42dd82a..628fe408d01 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -67,6 +67,7 @@ class CoordinationTest : public ::testing::TestWithParam { protected: DB::KeeperContextPtr keeper_context = std::make_shared(); + Poco::Logger * log{&Poco::Logger::get("CoordinationTest")}; }; TEST_P(CoordinationTest, BuildTest) @@ -129,10 +130,13 @@ struct SimpliestRaftServer params.snapshot_distance_ = 1; /// forcefully send snapshots params.client_req_timeout_ = 3000; params.return_method_ = nuraft::raft_params::blocking; + params.parallel_log_appending_ = true; + nuraft::raft_server::init_options opts; + opts.start_server_in_constructor_ = false; raft_instance = launcher.init( state_machine, state_manager, nuraft::cs_new("ToyRaftLogger", DB::LogsLevel::trace), port, - nuraft::asio_service::options{}, params); + nuraft::asio_service::options{}, params, opts); if (!raft_instance) { @@ -140,6 +144,10 @@ struct SimpliestRaftServer _exit(1); } + state_manager->getLogStore()->setRaftServer(raft_instance); + + raft_instance->start_server(false); + std::cout << "init Raft instance " << server_id; for (size_t ii = 0; ii < 20; ++ii) { @@ -207,7 +215,7 @@ TEST_P(CoordinationTest, TestSummingRaft1) while (s1.state_machine->getValue() != 143) { - std::cout << "Waiting s1 to apply entry\n"; + LOG_INFO(log, "Waiting s1 to apply entry"); std::this_thread::sleep_for(std::chrono::milliseconds(100)); } @@ -240,6 +248,15 @@ TEST_P(CoordinationTest, ChangelogTestSimple) EXPECT_EQ(changelog.log_entries(1, 2)->size(), 1); } +namespace +{ +void waitDurableLogs(nuraft::log_store & log_store) +{ + while (log_store.last_durable_index() != log_store.next_slot() - 1) + std::this_thread::sleep_for(std::chrono::milliseconds(200)); +} + +} TEST_P(CoordinationTest, ChangelogTestFile) { @@ -250,6 +267,9 @@ TEST_P(CoordinationTest, ChangelogTestFile) auto entry = getLogEntry("hello world", 77); changelog.append(entry); changelog.end_of_append_batch(0, 0); + + waitDurableLogs(changelog); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); for (const auto & p : fs::directory_iterator("./logs")) EXPECT_EQ(p.path(), "./logs/changelog_1_5.bin" + params.extension); @@ -261,6 +281,8 @@ TEST_P(CoordinationTest, ChangelogTestFile) changelog.append(entry); changelog.end_of_append_batch(0, 0); + waitDurableLogs(changelog); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); } @@ -271,6 +293,7 @@ TEST_P(CoordinationTest, ChangelogReadWrite) ChangelogDirTest test("./logs"); DB::KeeperLogStore changelog("./logs", 1000, true, params.enable_compression); changelog.init(1, 0); + for (size_t i = 0; i < 10; ++i) { auto entry = getLogEntry("hello world", i * 10); @@ -280,6 +303,8 @@ TEST_P(CoordinationTest, ChangelogReadWrite) EXPECT_EQ(changelog.size(), 10); + waitDurableLogs(changelog); + DB::KeeperLogStore changelog_reader("./logs", 1000, true, params.enable_compression); changelog_reader.init(1, 0); EXPECT_EQ(changelog_reader.size(), 10); @@ -315,6 +340,8 @@ TEST_P(CoordinationTest, ChangelogWriteAt) changelog.write_at(7, entry); changelog.end_of_append_batch(0, 0); + waitDurableLogs(changelog); + EXPECT_EQ(changelog.size(), 7); EXPECT_EQ(changelog.last_entry()->get_term(), 77); EXPECT_EQ(changelog.entry_at(7)->get_term(), 77); @@ -344,6 +371,9 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) changelog.end_of_append_batch(0, 0); EXPECT_EQ(changelog.size(), 7); + + waitDurableLogs(changelog); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); @@ -358,6 +388,8 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) } changelog_reader.end_of_append_batch(0, 0); EXPECT_EQ(changelog_reader.size(), 10); + + waitDurableLogs(changelog_reader); EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); @@ -371,6 +403,8 @@ TEST_P(CoordinationTest, ChangelogTestAppendAfterRead) changelog_reader.append(entry); changelog_reader.end_of_append_batch(0, 0); EXPECT_EQ(changelog_reader.size(), 11); + + waitDurableLogs(changelog_reader); EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); @@ -396,6 +430,8 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) } changelog.end_of_append_batch(0, 0); + waitDurableLogs(changelog); + EXPECT_EQ(changelog.size(), 3); changelog.compact(2); @@ -416,6 +452,8 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) changelog.append(e4); changelog.end_of_append_batch(0, 0); + waitDurableLogs(changelog); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); @@ -454,6 +492,8 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperations) EXPECT_EQ(changelog.size(), 10); + waitDurableLogs(changelog); + auto entries = changelog.pack(1, 5); DB::KeeperLogStore apply_changelog("./logs", 100, true, params.enable_compression); @@ -499,6 +539,8 @@ TEST_P(CoordinationTest, ChangelogTestBatchOperationsEmpty) EXPECT_EQ(changelog.size(), 10); + waitDurableLogs(changelog); + auto entries = changelog.pack(5, 5); ChangelogDirTest test1("./logs1"); @@ -543,6 +585,8 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) } changelog.end_of_append_batch(0, 0); + waitDurableLogs(changelog); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); @@ -561,6 +605,8 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtPreviousFile) EXPECT_EQ(changelog.next_slot(), 8); EXPECT_EQ(changelog.last_entry()->get_term(), 5555); + waitDurableLogs(changelog); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); @@ -592,6 +638,8 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) } changelog.end_of_append_batch(0, 0); + waitDurableLogs(changelog); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); @@ -610,6 +658,8 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtFileBorder) EXPECT_EQ(changelog.next_slot(), 12); EXPECT_EQ(changelog.last_entry()->get_term(), 5555); + waitDurableLogs(changelog); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); @@ -633,7 +683,6 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtAllFiles) ChangelogDirTest test("./logs"); DB::KeeperLogStore changelog("./logs", 5, true, params.enable_compression); changelog.init(1, 0); - for (size_t i = 0; i < 33; ++i) { auto entry = getLogEntry(std::to_string(i) + "_hello_world", i * 10); @@ -641,6 +690,8 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtAllFiles) } changelog.end_of_append_batch(0, 0); + waitDurableLogs(changelog); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); @@ -659,6 +710,8 @@ TEST_P(CoordinationTest, ChangelogTestWriteAtAllFiles) EXPECT_EQ(changelog.next_slot(), 2); EXPECT_EQ(changelog.last_entry()->get_term(), 5555); + waitDurableLogs(changelog); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_FALSE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); @@ -683,6 +736,8 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead) } changelog.end_of_append_batch(0, 0); EXPECT_EQ(changelog.size(), 35); + + waitDurableLogs(changelog); EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); @@ -692,7 +747,6 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead) EXPECT_TRUE(fs::exists("./logs/changelog_31_35.bin" + params.extension)); EXPECT_FALSE(fs::exists("./logs/changelog_36_40.bin" + params.extension)); - DB::KeeperLogStore changelog_reader("./logs", 5, true, params.enable_compression); changelog_reader.init(1, 0); @@ -701,6 +755,8 @@ TEST_P(CoordinationTest, ChangelogTestStartNewLogAfterRead) changelog_reader.end_of_append_batch(0, 0); EXPECT_EQ(changelog_reader.size(), 36); + + waitDurableLogs(changelog_reader); EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); @@ -746,6 +802,8 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) } changelog.end_of_append_batch(0, 0); EXPECT_EQ(changelog.size(), 35); + + waitDurableLogs(changelog); EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); @@ -779,6 +837,8 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate) EXPECT_EQ(changelog_reader.size(), 11); EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777); + waitDurableLogs(changelog_reader); + EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_11_15.bin" + params.extension)); @@ -809,6 +869,7 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) } changelog.end_of_append_batch(0, 0); + waitDurableLogs(changelog); EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + params.extension)); @@ -824,6 +885,9 @@ TEST_P(CoordinationTest, ChangelogTestReadAfterBrokenTruncate2) auto entry = getLogEntry("hello_world", 7777); changelog_reader.append(entry); changelog_reader.end_of_append_batch(0, 0); + + waitDurableLogs(changelog_reader); + EXPECT_EQ(changelog_reader.size(), 1); EXPECT_EQ(changelog_reader.last_entry()->get_term(), 7777); @@ -848,6 +912,7 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles) } changelog.end_of_append_batch(0, 0); + waitDurableLogs(changelog); EXPECT_TRUE(fs::exists("./logs/changelog_1_20.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_21_40.bin" + params.extension)); @@ -874,6 +939,8 @@ TEST_P(CoordinationTest, ChangelogTestLostFiles2) } changelog.end_of_append_batch(0, 0); + waitDurableLogs(changelog); + EXPECT_TRUE(fs::exists("./logs/changelog_1_10.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_11_20.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_21_30.bin" + params.extension)); @@ -1330,6 +1397,8 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint changelog.append(entry); changelog.end_of_append_batch(0, 0); + waitDurableLogs(changelog); + state_machine->pre_commit(i, changelog.entry_at(i)->get_buf()); state_machine->commit(i, changelog.entry_at(i)->get_buf()); bool snapshot_created = false; @@ -1339,7 +1408,7 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint nuraft::async_result::handler_type when_done = [&snapshot_created] (bool & ret, nuraft::ptr &/*exception*/) { snapshot_created = ret; - std::cerr << "Snapshot finished\n"; + LOG_INFO(&Poco::Logger::get("CoordinationTest"), "Snapshot finished"); }; state_machine->create_snapshot(s, when_done); @@ -1511,6 +1580,8 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) changelog.append(entry); changelog.end_of_append_batch(0, 0); } + + waitDurableLogs(changelog); } @@ -1527,6 +1598,8 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) changelog_1.end_of_append_batch(0, 0); } + waitDurableLogs(changelog_1); + EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin" + params.extension)); @@ -1542,6 +1615,8 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) changelog_2.end_of_append_batch(0, 0); } + waitDurableLogs(changelog_2); + changelog_2.compact(105); std::this_thread::sleep_for(std::chrono::microseconds(1000)); @@ -1562,6 +1637,8 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) changelog_3.end_of_append_batch(0, 0); } + waitDurableLogs(changelog_3); + changelog_3.compact(125); std::this_thread::sleep_for(std::chrono::microseconds(1000)); EXPECT_FALSE(fs::exists("./logs/changelog_101_110.bin" + params.extension)); @@ -1609,6 +1686,7 @@ TEST_P(CoordinationTest, TestCompressedLogsMultipleRewrite) changelog.end_of_append_batch(0, 0); } + waitDurableLogs(changelog); DB::KeeperLogStore changelog1("./logs", 100, true, test_params.enable_compression); changelog1.init(0, 3); @@ -1683,43 +1761,47 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesSmooth) auto params = GetParam(); ChangelogDirTest test("./logs"); { - std::cerr << "================First time=====================\n"; + LOG_INFO(log, "================First time====================="); DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); changelog.end_of_append_batch(0, 0); EXPECT_EQ(changelog.next_slot(), 2); + waitDurableLogs(changelog); } { - std::cerr << "================Second time=====================\n"; + LOG_INFO(log, "================Second time====================="); DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); changelog.end_of_append_batch(0, 0); EXPECT_EQ(changelog.next_slot(), 3); + waitDurableLogs(changelog); } { - std::cerr << "================Third time=====================\n"; + LOG_INFO(log, "================Third time====================="); DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); changelog.end_of_append_batch(0, 0); EXPECT_EQ(changelog.next_slot(), 4); + waitDurableLogs(changelog); } { - std::cerr << "================Fourth time=====================\n"; + LOG_INFO(log, "================Fourth time====================="); DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression); changelog.init(1, 0); auto entry = getLogEntry("hello_world", 1000); changelog.append(entry); changelog.end_of_append_batch(0, 0); EXPECT_EQ(changelog.next_slot(), 5); + waitDurableLogs(changelog); } } @@ -1730,7 +1812,7 @@ TEST_P(CoordinationTest, ChangelogInsertMultipleTimesSmooth) ChangelogDirTest test("./logs"); for (size_t i = 0; i < 36; ++i) { - std::cerr << "================First time=====================\n"; + LOG_INFO(log, "================First time====================="); DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression); changelog.init(1, 0); for (size_t j = 0; j < 7; ++j) @@ -1739,6 +1821,7 @@ TEST_P(CoordinationTest, ChangelogInsertMultipleTimesSmooth) changelog.append(entry); } changelog.end_of_append_batch(0, 0); + waitDurableLogs(changelog); } DB::KeeperLogStore changelog("./logs", 100, true, params.enable_compression); @@ -1750,37 +1833,49 @@ TEST_P(CoordinationTest, ChangelogInsertThreeTimesHard) { auto params = GetParam(); ChangelogDirTest test("./logs"); - std::cerr << "================First time=====================\n"; - DB::KeeperLogStore changelog1("./logs", 100, true, params.enable_compression); - changelog1.init(1, 0); - auto entry = getLogEntry("hello_world", 1000); - changelog1.append(entry); - changelog1.end_of_append_batch(0, 0); - EXPECT_EQ(changelog1.next_slot(), 2); + { + LOG_INFO(log, "================First time====================="); + DB::KeeperLogStore changelog1("./logs", 100, true, params.enable_compression); + changelog1.init(1, 0); + auto entry = getLogEntry("hello_world", 1000); + changelog1.append(entry); + changelog1.end_of_append_batch(0, 0); + EXPECT_EQ(changelog1.next_slot(), 2); + waitDurableLogs(changelog1); + } - std::cerr << "================Second time=====================\n"; - DB::KeeperLogStore changelog2("./logs", 100, true, params.enable_compression); - changelog2.init(1, 0); - entry = getLogEntry("hello_world", 1000); - changelog2.append(entry); - changelog2.end_of_append_batch(0, 0); - EXPECT_EQ(changelog2.next_slot(), 3); + { + LOG_INFO(log, "================Second time====================="); + DB::KeeperLogStore changelog2("./logs", 100, true, params.enable_compression); + changelog2.init(1, 0); + auto entry = getLogEntry("hello_world", 1000); + changelog2.append(entry); + changelog2.end_of_append_batch(0, 0); + EXPECT_EQ(changelog2.next_slot(), 3); + waitDurableLogs(changelog2); + } - std::cerr << "================Third time=====================\n"; - DB::KeeperLogStore changelog3("./logs", 100, true, params.enable_compression); - changelog3.init(1, 0); - entry = getLogEntry("hello_world", 1000); - changelog3.append(entry); - changelog3.end_of_append_batch(0, 0); - EXPECT_EQ(changelog3.next_slot(), 4); + { + LOG_INFO(log, "================Third time====================="); + DB::KeeperLogStore changelog3("./logs", 100, true, params.enable_compression); + changelog3.init(1, 0); + auto entry = getLogEntry("hello_world", 1000); + changelog3.append(entry); + changelog3.end_of_append_batch(0, 0); + EXPECT_EQ(changelog3.next_slot(), 4); + waitDurableLogs(changelog3); + } - std::cerr << "================Fourth time=====================\n"; - DB::KeeperLogStore changelog4("./logs", 100, true, params.enable_compression); - changelog4.init(1, 0); - entry = getLogEntry("hello_world", 1000); - changelog4.append(entry); - changelog4.end_of_append_batch(0, 0); - EXPECT_EQ(changelog4.next_slot(), 5); + { + LOG_INFO(log, "================Fourth time====================="); + DB::KeeperLogStore changelog4("./logs", 100, true, params.enable_compression); + changelog4.init(1, 0); + auto entry = getLogEntry("hello_world", 1000); + changelog4.append(entry); + changelog4.end_of_append_batch(0, 0); + EXPECT_EQ(changelog4.next_slot(), 5); + waitDurableLogs(changelog4); + } } TEST_P(CoordinationTest, TestStorageSnapshotEqual) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 874a0bd1773..33af6710999 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -398,6 +398,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \ M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \ M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + M(Bool, trace_profile_events, false, "Send to system.trace_log profile event and value of increment on each increment with 'ProfileEvent' trace_type", 0) \ \ M(UInt64, memory_usage_overcommit_max_wait_microseconds, 5'000'000, "Maximum time thread will wait for memory to be freed in the case of memory overcommit. If timeout is reached and memory is not freed, exception is thrown.", 0) \ \ @@ -582,6 +583,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \ M(Bool, query_plan_optimize_primary_key, true, "Analyze primary key using query plan (instead of AST)", 0) \ M(Bool, query_plan_read_in_order, true, "Use query plan for read-in-order optimisation", 0) \ + M(Bool, query_plan_aggregation_in_order, true, "Use query plan for aggregation-in-order optimisation", 0) \ M(UInt64, regexp_max_matches_per_row, 1000, "Max matches of any single regexp per row, used to safeguard 'extractAllGroupsHorizontal' against consuming too much memory with greedy RE.", 0) \ \ M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \ @@ -782,6 +784,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \ M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \ M(Bool, input_format_avro_null_as_default, false, "For Avro/AvroConfluent format: insert default in case of null and non Nullable column", 0) \ + M(UInt64, format_binary_max_string_size, 1_GiB, "The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit", 0) \ M(URI, format_avro_schema_registry_url, "", "For AvroConfluent format: Confluent Schema Registry URL.", 0) \ \ M(Bool, output_format_json_quote_64bit_integers, true, "Controls quoting of 64-bit integers in JSON output format.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 7635e121f8e..ee378b295fa 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -78,6 +78,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"22.12", {{"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, diff --git a/src/Core/SortDescription.h b/src/Core/SortDescription.h index 811ccb182f3..33fd6017599 100644 --- a/src/Core/SortDescription.h +++ b/src/Core/SortDescription.h @@ -51,13 +51,13 @@ struct SortColumnDescription SortColumnDescription() = default; explicit SortColumnDescription( - const std::string & column_name_, + std::string column_name_, int direction_ = 1, int nulls_direction_ = 1, const std::shared_ptr & collator_ = nullptr, bool with_fill_ = false, const FillColumnDescription & fill_description_ = {}) - : column_name(column_name_) + : column_name(std::move(column_name_)) , direction(direction_) , nulls_direction(nulls_direction_) , collator(collator_) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index d64b41253f5..ea86a91ac88 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -303,17 +303,17 @@ public: */ /// There is two variants for binary serde. First variant work with Field. - virtual void serializeBinary(const Field & field, WriteBuffer & ostr) const = 0; - virtual void deserializeBinary(Field & field, ReadBuffer & istr) const = 0; + virtual void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const = 0; + virtual void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const = 0; /// Other variants takes a column, to avoid creating temporary Field object. /// Column must be non-constant. /// Serialize one value of a column at specified row number. - virtual void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const = 0; + virtual void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; /// Deserialize one value and insert into a column. /// If method will throw an exception, then column will be in same state as before call to method. - virtual void deserializeBinary(IColumn & column, ReadBuffer & istr) const = 0; + virtual void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; /** Text serialization with escaping but without quoting. */ diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index c8db1a56ed0..7e192595114 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -17,13 +17,13 @@ namespace DB { -void SerializationAggregateFunction::serializeBinary(const Field & field, WriteBuffer & ostr) const +void SerializationAggregateFunction::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { const AggregateFunctionStateData & state = field.get(); writeBinary(state.data, ostr); } -void SerializationAggregateFunction::deserializeBinary(Field & field, ReadBuffer & istr) const +void SerializationAggregateFunction::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { field = AggregateFunctionStateData(); AggregateFunctionStateData & s = field.get(); @@ -31,12 +31,12 @@ void SerializationAggregateFunction::deserializeBinary(Field & field, ReadBuffer s.name = type_name; } -void SerializationAggregateFunction::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void SerializationAggregateFunction::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { function->serialize(assert_cast(column).getData()[row_num], ostr, version); } -void SerializationAggregateFunction::deserializeBinary(IColumn & column, ReadBuffer & istr) const +void SerializationAggregateFunction::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { ColumnAggregateFunction & column_concrete = assert_cast(column); diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.h b/src/DataTypes/Serializations/SerializationAggregateFunction.h index 1e32ce5d6f3..4212298bbc1 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.h +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.h @@ -22,11 +22,11 @@ public: : function(function_), type_name(std::move(type_name_)), version(version_) {} /// NOTE These two functions for serializing single values are incompatible with the functions below. - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override; void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index 143a3264381..f09589c50c3 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -27,18 +27,18 @@ static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30; static constexpr size_t MAX_ARRAYS_SIZE = 1ULL << 40; -void SerializationArray::serializeBinary(const Field & field, WriteBuffer & ostr) const +void SerializationArray::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { const Array & a = field.get(); writeVarUInt(a.size(), ostr); for (size_t i = 0; i < a.size(); ++i) { - nested->serializeBinary(a[i], ostr); + nested->serializeBinary(a[i], ostr, settings); } } -void SerializationArray::deserializeBinary(Field & field, ReadBuffer & istr) const +void SerializationArray::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const { size_t size; readVarUInt(size, istr); @@ -46,11 +46,11 @@ void SerializationArray::deserializeBinary(Field & field, ReadBuffer & istr) con Array & arr = field.get(); arr.reserve(size); for (size_t i = 0; i < size; ++i) - nested->deserializeBinary(arr.emplace_back(), istr); + nested->deserializeBinary(arr.emplace_back(), istr, settings); } -void SerializationArray::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void SerializationArray::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { const ColumnArray & column_array = assert_cast(column); const ColumnArray::Offsets & offsets = column_array.getOffsets(); @@ -63,11 +63,11 @@ void SerializationArray::serializeBinary(const IColumn & column, size_t row_num, const IColumn & nested_column = column_array.getData(); for (size_t i = offset; i < next_offset; ++i) - nested->serializeBinary(nested_column, i, ostr); + nested->serializeBinary(nested_column, i, ostr, settings); } -void SerializationArray::deserializeBinary(IColumn & column, ReadBuffer & istr) const +void SerializationArray::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { ColumnArray & column_array = assert_cast(column); ColumnArray::Offsets & offsets = column_array.getOffsets(); @@ -81,7 +81,7 @@ void SerializationArray::deserializeBinary(IColumn & column, ReadBuffer & istr) try { for (; i < size; ++i) - nested->deserializeBinary(nested_column, istr); + nested->deserializeBinary(nested_column, istr, settings); } catch (...) { diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h index 860461d667f..a5e10cd22fb 100644 --- a/src/DataTypes/Serializations/SerializationArray.h +++ b/src/DataTypes/Serializations/SerializationArray.h @@ -13,10 +13,10 @@ private: public: explicit SerializationArray(const SerializationPtr & nested_) : nested(nested_) {} - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override; diff --git a/src/DataTypes/Serializations/SerializationDecimalBase.cpp b/src/DataTypes/Serializations/SerializationDecimalBase.cpp index 00ffd607664..642ea1c7cd8 100644 --- a/src/DataTypes/Serializations/SerializationDecimalBase.cpp +++ b/src/DataTypes/Serializations/SerializationDecimalBase.cpp @@ -12,14 +12,14 @@ namespace DB { template -void SerializationDecimalBase::serializeBinary(const Field & field, WriteBuffer & ostr) const +void SerializationDecimalBase::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { FieldType x = field.get>(); writeBinary(x, ostr); } template -void SerializationDecimalBase::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void SerializationDecimalBase::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { const FieldType & x = assert_cast(column).getElement(row_num); writeBinary(x, ostr); @@ -39,7 +39,7 @@ void SerializationDecimalBase::serializeBinaryBulk(const IColumn & column, Wr } template -void SerializationDecimalBase::deserializeBinary(Field & field, ReadBuffer & istr) const +void SerializationDecimalBase::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { typename FieldType::NativeType x; readBinary(x, istr); @@ -47,7 +47,7 @@ void SerializationDecimalBase::deserializeBinary(Field & field, ReadBuffer & } template -void SerializationDecimalBase::deserializeBinary(IColumn & column, ReadBuffer & istr) const +void SerializationDecimalBase::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { typename FieldType::NativeType x; readBinary(x, istr); diff --git a/src/DataTypes/Serializations/SerializationDecimalBase.h b/src/DataTypes/Serializations/SerializationDecimalBase.h index fd3dcb17e35..08f963cedbb 100644 --- a/src/DataTypes/Serializations/SerializationDecimalBase.h +++ b/src/DataTypes/Serializations/SerializationDecimalBase.h @@ -20,12 +20,12 @@ public: SerializationDecimalBase(UInt32 precision_, UInt32 scale_) : precision(precision_), scale(scale_) {} - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; }; diff --git a/src/DataTypes/Serializations/SerializationFixedString.cpp b/src/DataTypes/Serializations/SerializationFixedString.cpp index dd476103108..7f9ebe174fa 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.cpp +++ b/src/DataTypes/Serializations/SerializationFixedString.cpp @@ -26,7 +26,7 @@ namespace ErrorCodes static constexpr size_t MAX_STRINGS_SIZE = 1ULL << 30; -void SerializationFixedString::serializeBinary(const Field & field, WriteBuffer & ostr) const +void SerializationFixedString::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { const String & s = field.get(); ostr.write(s.data(), std::min(s.size(), n)); @@ -36,7 +36,7 @@ void SerializationFixedString::serializeBinary(const Field & field, WriteBuffer } -void SerializationFixedString::deserializeBinary(Field & field, ReadBuffer & istr) const +void SerializationFixedString::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { field = String(); String & s = field.get(); @@ -45,13 +45,13 @@ void SerializationFixedString::deserializeBinary(Field & field, ReadBuffer & ist } -void SerializationFixedString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void SerializationFixedString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { ostr.write(reinterpret_cast(&assert_cast(column).getChars()[n * row_num]), n); } -void SerializationFixedString::deserializeBinary(IColumn & column, ReadBuffer & istr) const +void SerializationFixedString::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { ColumnFixedString::Chars & data = assert_cast(column).getChars(); size_t old_size = data.size(); diff --git a/src/DataTypes/Serializations/SerializationFixedString.h b/src/DataTypes/Serializations/SerializationFixedString.h index c3c08b20419..3db31ab02cb 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.h +++ b/src/DataTypes/Serializations/SerializationFixedString.h @@ -15,10 +15,10 @@ public: explicit SerializationFixedString(size_t n_) : n(n_) {} size_t getN() const { return n; } - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override; void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index c70bb1e1465..b3f91c0297b 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -718,22 +718,22 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( column = std::move(mutable_column); } -void SerializationLowCardinality::serializeBinary(const Field & field, WriteBuffer & ostr) const +void SerializationLowCardinality::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - dictionary_type->getDefaultSerialization()->serializeBinary(field, ostr); + dictionary_type->getDefaultSerialization()->serializeBinary(field, ostr, settings); } -void SerializationLowCardinality::deserializeBinary(Field & field, ReadBuffer & istr) const +void SerializationLowCardinality::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const { - dictionary_type->getDefaultSerialization()->deserializeBinary(field, istr); + dictionary_type->getDefaultSerialization()->deserializeBinary(field, istr, settings); } -void SerializationLowCardinality::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void SerializationLowCardinality::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - serializeImpl(column, row_num, &ISerialization::serializeBinary, ostr); + serializeImpl(column, row_num, &ISerialization::serializeBinary, ostr, settings); } -void SerializationLowCardinality::deserializeBinary(IColumn & column, ReadBuffer & istr) const +void SerializationLowCardinality::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeImpl(column, &ISerialization::deserializeBinary, istr); + deserializeImpl(column, &ISerialization::deserializeBinary, istr, settings); } void SerializationLowCardinality::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.h b/src/DataTypes/Serializations/SerializationLowCardinality.h index 1d0c3226faf..5f56bcf8108 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.h +++ b/src/DataTypes/Serializations/SerializationLowCardinality.h @@ -49,10 +49,10 @@ public: DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const override; - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index cd0a99c0c68..61b04e843b2 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -36,7 +36,7 @@ static IColumn & extractNestedColumn(IColumn & column) return assert_cast(column).getNestedColumn(); } -void SerializationMap::serializeBinary(const Field & field, WriteBuffer & ostr) const +void SerializationMap::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { const auto & map = field.get(); writeVarUInt(map.size(), ostr); @@ -44,12 +44,12 @@ void SerializationMap::serializeBinary(const Field & field, WriteBuffer & ostr) { const auto & tuple = elem.safeGet(); assert(tuple.size() == 2); - key->serializeBinary(tuple[0], ostr); - value->serializeBinary(tuple[1], ostr); + key->serializeBinary(tuple[0], ostr, settings); + value->serializeBinary(tuple[1], ostr, settings); } } -void SerializationMap::deserializeBinary(Field & field, ReadBuffer & istr) const +void SerializationMap::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const { size_t size; readVarUInt(size, istr); @@ -59,20 +59,20 @@ void SerializationMap::deserializeBinary(Field & field, ReadBuffer & istr) const for (size_t i = 0; i < size; ++i) { Tuple tuple(2); - key->deserializeBinary(tuple[0], istr); - value->deserializeBinary(tuple[1], istr); + key->deserializeBinary(tuple[0], istr, settings); + value->deserializeBinary(tuple[1], istr, settings); map.push_back(std::move(tuple)); } } -void SerializationMap::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void SerializationMap::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - nested->serializeBinary(extractNestedColumn(column), row_num, ostr); + nested->serializeBinary(extractNestedColumn(column), row_num, ostr, settings); } -void SerializationMap::deserializeBinary(IColumn & column, ReadBuffer & istr) const +void SerializationMap::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - nested->deserializeBinary(extractNestedColumn(column), istr); + nested->deserializeBinary(extractNestedColumn(column), istr, settings); } diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index 864ac1f3a99..556a50fbbc1 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -18,10 +18,10 @@ private: public: SerializationMap(const SerializationPtr & key_type_, const SerializationPtr & value_type_, const SerializationPtr & nested_); - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/src/DataTypes/Serializations/SerializationNothing.h b/src/DataTypes/Serializations/SerializationNothing.h index e46a1e6ed30..06ce00eb636 100644 --- a/src/DataTypes/Serializations/SerializationNothing.h +++ b/src/DataTypes/Serializations/SerializationNothing.h @@ -19,10 +19,10 @@ private: throw Exception("Serialization is not implemented for type Nothing", ErrorCodes::NOT_IMPLEMENTED); } public: - void serializeBinary(const Field &, WriteBuffer &) const override { throwNoSerialization(); } - void deserializeBinary(Field &, ReadBuffer &) const override { throwNoSerialization(); } - void serializeBinary(const IColumn &, size_t, WriteBuffer &) const override { throwNoSerialization(); } - void deserializeBinary(IColumn &, ReadBuffer &) const override { throwNoSerialization(); } + void serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } + void deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); } void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); } void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); } diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index c46fde27ddb..8a57c4bc9a1 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -150,7 +150,7 @@ void SerializationNullable::deserializeBinaryBulkWithMultipleStreams( } -void SerializationNullable::serializeBinary(const Field & field, WriteBuffer & ostr) const +void SerializationNullable::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { if (field.isNull()) { @@ -159,17 +159,17 @@ void SerializationNullable::serializeBinary(const Field & field, WriteBuffer & o else { writeBinary(false, ostr); - nested->serializeBinary(field, ostr); + nested->serializeBinary(field, ostr, settings); } } -void SerializationNullable::deserializeBinary(Field & field, ReadBuffer & istr) const +void SerializationNullable::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const { bool is_null = false; readBinary(is_null, istr); if (!is_null) { - nested->deserializeBinary(field, istr); + nested->deserializeBinary(field, istr, settings); } else { @@ -177,14 +177,14 @@ void SerializationNullable::deserializeBinary(Field & field, ReadBuffer & istr) } } -void SerializationNullable::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void SerializationNullable::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { const ColumnNullable & col = assert_cast(column); bool is_null = col.isNullAt(row_num); writeBinary(is_null, ostr); if (!is_null) - nested->serializeBinary(col.getNestedColumn(), row_num, ostr); + nested->serializeBinary(col.getNestedColumn(), row_num, ostr, settings); } /// Deserialize value into ColumnNullable. @@ -235,11 +235,11 @@ static ReturnType safeDeserialize( } -void SerializationNullable::deserializeBinary(IColumn & column, ReadBuffer & istr) const +void SerializationNullable::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { safeDeserialize(column, *nested, [&istr] { bool is_null = false; readBinary(is_null, istr); return is_null; }, - [this, &istr] (IColumn & nested_column) { nested->deserializeBinary(nested_column, istr); }); + [this, &istr, settings] (IColumn & nested_column) { nested->deserializeBinary(nested_column, istr, settings); }); } diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index 9aabbe299cc..3ec01b46de5 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -45,10 +45,10 @@ public: DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const override; - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/src/DataTypes/Serializations/SerializationNumber.cpp b/src/DataTypes/Serializations/SerializationNumber.cpp index 10e80d92b30..8cabaec753d 100644 --- a/src/DataTypes/Serializations/SerializationNumber.cpp +++ b/src/DataTypes/Serializations/SerializationNumber.cpp @@ -102,7 +102,7 @@ void SerializationNumber::deserializeTextCSV(IColumn & column, ReadBuffer & i } template -void SerializationNumber::serializeBinary(const Field & field, WriteBuffer & ostr) const +void SerializationNumber::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { /// ColumnVector::ValueType is a narrower type. For example, UInt8, when the Field type is UInt64 typename ColumnVector::ValueType x = static_cast::ValueType>(field.get()); @@ -110,7 +110,7 @@ void SerializationNumber::serializeBinary(const Field & field, WriteBuffer & } template -void SerializationNumber::deserializeBinary(Field & field, ReadBuffer & istr) const +void SerializationNumber::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { typename ColumnVector::ValueType x; readBinary(x, istr); @@ -118,13 +118,13 @@ void SerializationNumber::deserializeBinary(Field & field, ReadBuffer & istr) } template -void SerializationNumber::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void SerializationNumber::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeBinary(assert_cast &>(column).getData()[row_num], ostr); } template -void SerializationNumber::deserializeBinary(IColumn & column, ReadBuffer & istr) const +void SerializationNumber::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { typename ColumnVector::ValueType x; readBinary(x, istr); diff --git a/src/DataTypes/Serializations/SerializationNumber.h b/src/DataTypes/Serializations/SerializationNumber.h index 062453b1e19..67ed91848d4 100644 --- a/src/DataTypes/Serializations/SerializationNumber.h +++ b/src/DataTypes/Serializations/SerializationNumber.h @@ -22,10 +22,10 @@ public: void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; /** Format is platform-dependent. */ - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override; void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; }; diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index 98a94886f67..3e5e1934614 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -376,25 +376,25 @@ void SerializationObject::deserializeBinaryBulkFromTuple( } template -void SerializationObject::serializeBinary(const Field &, WriteBuffer &) const +void SerializationObject::serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObject"); } template -void SerializationObject::deserializeBinary(Field &, ReadBuffer &) const +void SerializationObject::deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObject"); } template -void SerializationObject::serializeBinary(const IColumn &, size_t, WriteBuffer &) const +void SerializationObject::serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObject"); } template -void SerializationObject::deserializeBinary(IColumn &, ReadBuffer &) const +void SerializationObject::deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObject"); } diff --git a/src/DataTypes/Serializations/SerializationObject.h b/src/DataTypes/Serializations/SerializationObject.h index 47a7127cd1c..9cf56fcab96 100644 --- a/src/DataTypes/Serializations/SerializationObject.h +++ b/src/DataTypes/Serializations/SerializationObject.h @@ -57,10 +57,10 @@ public: DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const override; - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationSparse.cpp b/src/DataTypes/Serializations/SerializationSparse.cpp index cd09cd7be5a..4d7514271ad 100644 --- a/src/DataTypes/Serializations/SerializationSparse.cpp +++ b/src/DataTypes/Serializations/SerializationSparse.cpp @@ -302,23 +302,23 @@ void SerializationSparse::deserializeBinaryBulkWithMultipleStreams( /// All methods below just wrap nested serialization. -void SerializationSparse::serializeBinary(const Field & field, WriteBuffer & ostr) const +void SerializationSparse::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - nested->serializeBinary(field, ostr); + nested->serializeBinary(field, ostr, settings); } -void SerializationSparse::deserializeBinary(Field & field, ReadBuffer & istr) const +void SerializationSparse::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const { - nested->deserializeBinary(field, istr); + nested->deserializeBinary(field, istr, settings); } -void SerializationSparse::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void SerializationSparse::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { const auto & column_sparse = assert_cast(column); - nested->serializeBinary(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr); + nested->serializeBinary(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings); } -void SerializationSparse::deserializeBinary(IColumn &, ReadBuffer &) const +void SerializationSparse::deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeBinary' is not implemented for SerializationSparse"); } diff --git a/src/DataTypes/Serializations/SerializationSparse.h b/src/DataTypes/Serializations/SerializationSparse.h index c157fe7ce98..2d31fba2509 100644 --- a/src/DataTypes/Serializations/SerializationSparse.h +++ b/src/DataTypes/Serializations/SerializationSparse.h @@ -61,11 +61,11 @@ public: DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const override; - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp index 46acaada72c..c314ed49973 100644 --- a/src/DataTypes/Serializations/SerializationString.cpp +++ b/src/DataTypes/Serializations/SerializationString.cpp @@ -25,20 +25,37 @@ namespace DB namespace ErrorCodes { extern const int INCORRECT_DATA; + extern const int TOO_LARGE_STRING_SIZE; } -void SerializationString::serializeBinary(const Field & field, WriteBuffer & ostr) const +void SerializationString::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { const String & s = field.get(); + if (settings.max_binary_string_size && s.size() > settings.max_binary_string_size) + throw Exception( + ErrorCodes::TOO_LARGE_STRING_SIZE, + "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " + "format_binary_max_string_size", + s.size(), + settings.max_binary_string_size); + writeVarUInt(s.size(), ostr); writeString(s, ostr); } -void SerializationString::deserializeBinary(Field & field, ReadBuffer & istr) const +void SerializationString::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const { UInt64 size; readVarUInt(size, istr); + if (settings.max_binary_string_size && size > settings.max_binary_string_size) + throw Exception( + ErrorCodes::TOO_LARGE_STRING_SIZE, + "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " + "format_binary_max_string_size", + size, + settings.max_binary_string_size); + field = String(); String & s = field.get(); s.resize(size); @@ -46,15 +63,23 @@ void SerializationString::deserializeBinary(Field & field, ReadBuffer & istr) co } -void SerializationString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void SerializationString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { const StringRef & s = assert_cast(column).getDataAt(row_num); + if (settings.max_binary_string_size && s.size > settings.max_binary_string_size) + throw Exception( + ErrorCodes::TOO_LARGE_STRING_SIZE, + "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " + "format_binary_max_string_size", + s.size, + settings.max_binary_string_size); + writeVarUInt(s.size, ostr); writeString(s, ostr); } -void SerializationString::deserializeBinary(IColumn & column, ReadBuffer & istr) const +void SerializationString::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { ColumnString & column_string = assert_cast(column); ColumnString::Chars & data = column_string.getChars(); @@ -62,6 +87,13 @@ void SerializationString::deserializeBinary(IColumn & column, ReadBuffer & istr) UInt64 size; readVarUInt(size, istr); + if (settings.max_binary_string_size && size > settings.max_binary_string_size) + throw Exception( + ErrorCodes::TOO_LARGE_STRING_SIZE, + "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting " + "format_binary_max_string_size", + size, + settings.max_binary_string_size); size_t old_chars_size = data.size(); size_t offset = old_chars_size + size + 1; diff --git a/src/DataTypes/Serializations/SerializationString.h b/src/DataTypes/Serializations/SerializationString.h index ee5de2c18f1..f27a5116c15 100644 --- a/src/DataTypes/Serializations/SerializationString.h +++ b/src/DataTypes/Serializations/SerializationString.h @@ -8,10 +8,10 @@ namespace DB class SerializationString final : public ISerialization { public: - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override; void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 8ffb1fe86bc..0ed2b034985 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -29,17 +29,17 @@ static inline const IColumn & extractElementColumn(const IColumn & column, size_ return assert_cast(column).getColumn(idx); } -void SerializationTuple::serializeBinary(const Field & field, WriteBuffer & ostr) const +void SerializationTuple::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { const auto & tuple = field.get(); for (size_t element_index = 0; element_index < elems.size(); ++element_index) { const auto & serialization = elems[element_index]; - serialization->serializeBinary(tuple[element_index], ostr); + serialization->serializeBinary(tuple[element_index], ostr, settings); } } -void SerializationTuple::deserializeBinary(Field & field, ReadBuffer & istr) const +void SerializationTuple::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const { const size_t size = elems.size(); @@ -47,15 +47,15 @@ void SerializationTuple::deserializeBinary(Field & field, ReadBuffer & istr) con Tuple & tuple = field.get(); tuple.reserve(size); for (size_t i = 0; i < size; ++i) - elems[i]->deserializeBinary(tuple.emplace_back(), istr); + elems[i]->deserializeBinary(tuple.emplace_back(), istr, settings); } -void SerializationTuple::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void SerializationTuple::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { for (size_t element_index = 0; element_index < elems.size(); ++element_index) { const auto & serialization = elems[element_index]; - serialization->serializeBinary(extractElementColumn(column, element_index), row_num, ostr); + serialization->serializeBinary(extractElementColumn(column, element_index), row_num, ostr, settings); } } @@ -97,12 +97,12 @@ static void addElementSafe(size_t num_elems, IColumn & column, F && impl) } } -void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr) const +void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { addElementSafe(elems.size(), column, [&] { for (size_t i = 0; i < elems.size(); ++i) - elems[i]->deserializeBinary(extractElementColumn(column, i), istr); + elems[i]->deserializeBinary(extractElementColumn(column, i), istr, settings); }); } diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h index db0339bc996..5c177e3f0e8 100644 --- a/src/DataTypes/Serializations/SerializationTuple.h +++ b/src/DataTypes/Serializations/SerializationTuple.h @@ -17,10 +17,10 @@ public: { } - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp index 1fa9219a643..7b184aeee96 100644 --- a/src/DataTypes/Serializations/SerializationUUID.cpp +++ b/src/DataTypes/Serializations/SerializationUUID.cpp @@ -82,25 +82,25 @@ void SerializationUUID::deserializeTextCSV(IColumn & column, ReadBuffer & istr, } -void SerializationUUID::serializeBinary(const Field & field, WriteBuffer & ostr) const +void SerializationUUID::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const { UUID x = field.get(); writeBinary(x, ostr); } -void SerializationUUID::deserializeBinary(Field & field, ReadBuffer & istr) const +void SerializationUUID::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const { UUID x; readBinary(x, istr); field = NearestFieldType(x); } -void SerializationUUID::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void SerializationUUID::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeBinary(assert_cast &>(column).getData()[row_num], ostr); } -void SerializationUUID::deserializeBinary(IColumn & column, ReadBuffer & istr) const +void SerializationUUID::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { UUID x; readBinary(x, istr); diff --git a/src/DataTypes/Serializations/SerializationUUID.h b/src/DataTypes/Serializations/SerializationUUID.h index 061e58f4670..da8c15f7279 100644 --- a/src/DataTypes/Serializations/SerializationUUID.h +++ b/src/DataTypes/Serializations/SerializationUUID.h @@ -19,10 +19,10 @@ public: void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override; void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; }; diff --git a/src/DataTypes/Serializations/SerializationWrapper.cpp b/src/DataTypes/Serializations/SerializationWrapper.cpp index c83de614751..ce598142ab2 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.cpp +++ b/src/DataTypes/Serializations/SerializationWrapper.cpp @@ -66,24 +66,24 @@ void SerializationWrapper::deserializeBinaryBulk(IColumn & column, ReadBuffer & nested_serialization->deserializeBinaryBulk(column, istr, limit, avg_value_size_hint); } -void SerializationWrapper::serializeBinary(const Field & field, WriteBuffer & ostr) const +void SerializationWrapper::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const { - nested_serialization->serializeBinary(field, ostr); + nested_serialization->serializeBinary(field, ostr, settings); } -void SerializationWrapper::deserializeBinary(Field & field, ReadBuffer & istr) const +void SerializationWrapper::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const { - nested_serialization->deserializeBinary(field, istr); + nested_serialization->deserializeBinary(field, istr, settings); } -void SerializationWrapper::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +void SerializationWrapper::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - nested_serialization->serializeBinary(column, row_num, ostr); + nested_serialization->serializeBinary(column, row_num, ostr, settings); } -void SerializationWrapper::deserializeBinary(IColumn & column, ReadBuffer & istr) const +void SerializationWrapper::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - nested_serialization->deserializeBinary(column, istr); + nested_serialization->deserializeBinary(column, istr, settings); } void SerializationWrapper::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h index 46941f150e1..c141ff5e38d 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.h +++ b/src/DataTypes/Serializations/SerializationWrapper.h @@ -55,11 +55,11 @@ public: void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override; void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; - void serializeBinary(const Field & field, WriteBuffer & ostr) const override; - void deserializeBinary(Field & field, ReadBuffer & istr) const override; + void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override; - void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; - void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index bedaec75565..525f4fb7b12 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -1,198 +1,239 @@ #include #include #include -#include #include #include #include #include #include +#include #include + namespace DB { -using TableLoadingDependenciesVisitor = DDLDependencyVisitor::Visitor; +namespace +{ + /// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query. + void visitCreateQuery(const ASTCreateQuery & create, DDLDependencyVisitor::Data & data) + { + QualifiedTableName to_table{create.to_table_id.database_name, create.to_table_id.table_name}; + if (!to_table.table.empty()) + { + /// TO target_table (for materialized views) + if (to_table.database.empty()) + to_table.database = data.default_database; + data.dependencies.emplace(to_table); + } -TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast) + QualifiedTableName as_table{create.as_database, create.as_table}; + if (!as_table.table.empty()) + { + /// AS table_name + if (as_table.database.empty()) + as_table.database = data.default_database; + data.dependencies.emplace(as_table); + } + } + + /// ASTTableExpression represents a reference to a table in SELECT query. + /// DDLDependencyVisitor should handle ASTTableExpression because some CREATE queries can contain SELECT queries after AS + /// (for example, CREATE VIEW). + void visitTableExpression(const ASTTableExpression & expr, DDLDependencyVisitor::Data & data) + { + if (!expr.database_and_table_name) + return; + + const ASTIdentifier * identifier = dynamic_cast(expr.database_and_table_name.get()); + if (!identifier) + return; + + auto table_identifier = identifier->createTable(); + if (!table_identifier) + return; + + QualifiedTableName qualified_name{table_identifier->getDatabaseName(), table_identifier->shortName()}; + if (qualified_name.table.empty()) + return; + + if (qualified_name.database.empty()) + { + /// It can be table/dictionary from default database or XML dictionary, but we cannot distinguish it here. + qualified_name.database = data.default_database; + } + + data.dependencies.emplace(qualified_name); + } + + /// Extracts a table name with optional database written in the form db_name.table_name (as identifier) or 'db_name.table_name' (as string). + void extractQualifiedTableNameFromArgument(const ASTFunction & function, DDLDependencyVisitor::Data & data, size_t arg_idx) + { + /// Just ignore incorrect arguments, proper exception will be thrown later + if (!function.arguments || function.arguments->children.size() <= arg_idx) + return; + + QualifiedTableName qualified_name; + + const auto * expr_list = function.arguments->as(); + if (!expr_list) + return; + + const auto * arg = expr_list->children[arg_idx].get(); + if (const auto * literal = arg->as()) + { + if (literal->value.getType() != Field::Types::String) + return; + + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.get()); + /// Just return if name if invalid + if (!maybe_qualified_name) + return; + + qualified_name = std::move(*maybe_qualified_name); + } + else if (const auto * identifier = dynamic_cast(arg)) + { + /// ASTIdentifier or ASTTableIdentifier + auto table_identifier = identifier->createTable(); + /// Just return if table identified is invalid + if (!table_identifier) + return; + + qualified_name.database = table_identifier->getDatabaseName(); + qualified_name.table = table_identifier->shortName(); + } + else + { + /// Just return because we don't validate AST in this function. + return; + } + + if (qualified_name.database.empty()) + { + /// It can be table/dictionary from default database or XML dictionary, but we cannot distinguish it here. + qualified_name.database = data.default_database; + } + data.dependencies.emplace(std::move(qualified_name)); + } + + /// Extracts a table name with database written in the form 'db_name', 'table_name' (two strings). + void extractDatabaseAndTableNameFromArguments(const ASTFunction & function, DDLDependencyVisitor::Data & data, size_t database_arg_idx, size_t table_arg_idx) + { + /// Just ignore incorrect arguments, proper exception will be thrown later + if (!function.arguments || (function.arguments->children.size() <= database_arg_idx) + || (function.arguments->children.size() <= table_arg_idx)) + return; + + const auto * expr_list = function.arguments->as(); + if (!expr_list) + return; + + const auto * database_literal = expr_list->children[database_arg_idx]->as(); + const auto * table_name_literal = expr_list->children[table_arg_idx]->as(); + + if (!database_literal || !table_name_literal || (database_literal->value.getType() != Field::Types::String) + || (table_name_literal->value.getType() != Field::Types::String)) + return; + + QualifiedTableName qualified_name{database_literal->value.get(), table_name_literal->value.get()}; + if (qualified_name.table.empty()) + return; + + if (qualified_name.database.empty()) + qualified_name.database = data.default_database; + + data.dependencies.emplace(qualified_name); + } + + void visitFunction(const ASTFunction & function, DDLDependencyVisitor::Data & data) + { + if (function.name == "joinGet" || function.name == "dictHas" || function.name == "dictIsIn" || function.name.starts_with("dictGet")) + { + /// dictGet('dict_name', attr_names, id_expr) + /// dictHas('dict_name', id_expr) + /// joinGet(join_storage_table_name, `value_column`, join_keys) + extractQualifiedTableNameFromArgument(function, data, 0); + } + else if (function.name == "in" || function.name == "notIn" || function.name == "globalIn" || function.name == "globalNotIn") + { + /// in(x, table_name) - function for evaluating (x IN table_name) + extractQualifiedTableNameFromArgument(function, data, 1); + } + else if (function.name == "dictionary") + { + /// dictionary(dict_name) + extractQualifiedTableNameFromArgument(function, data, 0); + } + } + + void visitTableEngine(const ASTFunction & table_engine, DDLDependencyVisitor::Data & data) + { + if (table_engine.name == "Dictionary") + extractQualifiedTableNameFromArgument(table_engine, data, 0); + + if (table_engine.name == "Buffer") + extractDatabaseAndTableNameFromArguments(table_engine, data, 0, 1); + } + + void visitDictionaryDef(const ASTDictionary & dictionary, DDLDependencyVisitor::Data & data) + { + if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements) + return; + + auto config = getDictionaryConfigurationFromAST(data.create_query->as(), data.global_context); + auto info = getInfoIfClickHouseDictionarySource(config, data.global_context); + + if (!info || !info->is_local) + return; + + if (info->table_name.database.empty()) + info->table_name.database = data.default_database; + data.dependencies.emplace(std::move(info->table_name)); + } +} + + +TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast) { assert(global_context == global_context->getGlobalContext()); - TableLoadingDependenciesVisitor::Data data; + DDLDependencyVisitor::Data data; + data.table_name = table_name; data.default_database = global_context->getCurrentDatabase(); data.create_query = ast; data.global_context = global_context; - TableLoadingDependenciesVisitor visitor{data}; + DDLDependencyVisitor::Visitor visitor{data}; visitor.visit(ast); - data.dependencies.erase(table); + data.dependencies.erase(data.table_name); return data.dependencies; } void DDLDependencyVisitor::visit(const ASTPtr & ast, Data & data) { - /// Looking for functions in column default expressions and dictionary source definition - if (const auto * function = ast->as()) - visit(*function, data); - else if (const auto * dict_source = ast->as()) - visit(*dict_source, data); - else if (const auto * storage = ast->as()) - visit(*storage, data); + if (auto * create = ast->as()) + { + visitCreateQuery(*create, data); + } + else if (auto * dictionary = ast->as()) + { + visitDictionaryDef(*dictionary, data); + } + else if (auto * expr = ast->as()) + { + visitTableExpression(*expr, data); + } + else if (const auto * function = ast->as()) + { + if (function->kind == ASTFunction::Kind::TABLE_ENGINE) + visitTableEngine(*function, data); + else + visitFunction(*function, data); + } } -bool DDLMatcherBase::needChildVisit(const ASTPtr & node, const ASTPtr & child) +bool DDLDependencyVisitor::needChildVisit(const ASTPtr &, const ASTPtr &) { - if (node->as()) - return false; - - if (auto * create = node->as()) - { - if (child.get() == create->select) - return false; - } - return true; } -ssize_t DDLMatcherBase::getPositionOfTableNameArgument(const ASTFunction & function) -{ - if (function.name == "joinGet" || - function.name == "dictHas" || - function.name == "dictIsIn" || - function.name.starts_with("dictGet")) - return 0; - - if (Poco::toLower(function.name) == "in") - return 1; - - return -1; -} - -void DDLDependencyVisitor::visit(const ASTFunction & function, Data & data) -{ - ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function); - if (table_name_arg_idx < 0) - return; - extractTableNameFromArgument(function, data, table_name_arg_idx); -} - -void DDLDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data) -{ - if (dict_source.name != "clickhouse") - return; - if (!dict_source.elements) - return; - - auto config = getDictionaryConfigurationFromAST(data.create_query->as(), data.global_context); - auto info = getInfoIfClickHouseDictionarySource(config, data.global_context); - - if (!info || !info->is_local) - return; - - if (info->table_name.database.empty()) - info->table_name.database = data.default_database; - data.dependencies.emplace(std::move(info->table_name)); -} - -void DDLDependencyVisitor::visit(const ASTStorage & storage, Data & data) -{ - if (!storage.engine) - return; - if (storage.engine->name != "Dictionary") - return; - - extractTableNameFromArgument(*storage.engine, data, 0); -} - - -void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx) -{ - /// Just ignore incorrect arguments, proper exception will be thrown later - if (!function.arguments || function.arguments->children.size() <= arg_idx) - return; - - QualifiedTableName qualified_name; - - const auto * arg = function.arguments->as()->children[arg_idx].get(); - if (const auto * literal = arg->as()) - { - if (literal->value.getType() != Field::Types::String) - return; - - auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.get()); - /// Just return if name if invalid - if (!maybe_qualified_name) - return; - - qualified_name = std::move(*maybe_qualified_name); - } - else if (const auto * identifier = dynamic_cast(arg)) - { - /// ASTIdentifier or ASTTableIdentifier - auto table_identifier = identifier->createTable(); - /// Just return if table identified is invalid - if (!table_identifier) - return; - - qualified_name.database = table_identifier->getDatabaseName(); - qualified_name.table = table_identifier->shortName(); - } - else - { - assert(false); - return; - } - - if (qualified_name.database.empty()) - { - /// It can be table/dictionary from default database or XML dictionary, but we cannot distinguish it here. - qualified_name.database = data.default_database; - } - data.dependencies.emplace(std::move(qualified_name)); -} - - -void NormalizeAndEvaluateConstants::visit(const ASTPtr & ast, Data & data) -{ - assert(data.create_query_context->hasQueryContext()); - - /// Looking for functions in column default expressions and dictionary source definition - if (const auto * function = ast->as()) - visit(*function, data); - else if (const auto * dict_source = ast->as()) - visit(*dict_source, data); -} - -void NormalizeAndEvaluateConstants::visit(const ASTFunction & function, Data & data) -{ - /// Replace expressions like "dictGet(currentDatabase() || '.dict', 'value', toUInt32(1))" - /// with "dictGet('db_name.dict', 'value', toUInt32(1))" - ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function); - if (table_name_arg_idx < 0) - return; - - if (!function.arguments || function.arguments->children.size() <= static_cast(table_name_arg_idx)) - return; - - auto & arg = function.arguments->as().children[table_name_arg_idx]; - if (arg->as()) - arg = evaluateConstantExpressionAsLiteral(arg, data.create_query_context); -} - - -void NormalizeAndEvaluateConstants::visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data) -{ - if (!dict_source.elements) - return; - - auto & expr_list = dict_source.elements->as(); - for (auto & child : expr_list.children) - { - ASTPair * pair = child->as(); - if (pair->second->as()) - { - auto ast_literal = evaluateConstantExpressionAsLiteral(pair->children[0], data.create_query_context); - pair->replace(pair->second, ast_literal); - } - } -} - } diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index d23a7a697a9..9709eeec9d3 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -1,72 +1,36 @@ #pragma once -#include + #include #include +#include + namespace DB { - -class ASTFunction; -class ASTFunctionWithKeyValueArguments; -class ASTStorage; - using TableNamesSet = std::unordered_set; -TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast); - - -class DDLMatcherBase -{ -public: - static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); - static ssize_t getPositionOfTableNameArgument(const ASTFunction & function); -}; - -/// Visits ASTCreateQuery and extracts names of table (or dictionary) dependencies -/// from column default expressions (joinGet, dictGet, etc) -/// or dictionary source (for dictionaries from local ClickHouse table). +/// Returns a list of all tables explicitly referenced in the create query of a specified table. +/// For example, a column default expression can use dictGet() and thus reference a dictionary. /// Does not validate AST, works a best-effort way. -class DDLDependencyVisitor : public DDLMatcherBase +TableNamesSet getDependenciesFromCreateQuery(const ContextPtr & global_context, const QualifiedTableName & table_name, const ASTPtr & ast); + +/// Visits ASTCreateQuery and extracts the names of all tables explicitly referenced in the create query. +class DDLDependencyVisitor { public: struct Data { - String default_database; - TableNamesSet dependencies; - ContextPtr global_context; ASTPtr create_query; + QualifiedTableName table_name; + String default_database; + ContextPtr global_context; + TableNamesSet dependencies; }; - using Visitor = ConstInDepthNodeVisitor; + using Visitor = ConstInDepthNodeVisitor; static void visit(const ASTPtr & ast, Data & data); - -private: - static void visit(const ASTFunction & function, Data & data); - static void visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data); - static void visit(const ASTStorage & storage, Data & data); - - static void extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx); + static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); }; -class NormalizeAndEvaluateConstants : public DDLMatcherBase -{ -public: - struct Data - { - ContextPtr create_query_context; - }; - - using Visitor = ConstInDepthNodeVisitor; - - static void visit(const ASTPtr & ast, Data & data); - -private: - static void visit(const ASTFunction & function, Data & data); - static void visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data); - -}; - -using NormalizeAndEvaluateConstantsVisitor = NormalizeAndEvaluateConstants::Visitor; - } diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp new file mode 100644 index 00000000000..8536d1c890d --- /dev/null +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -0,0 +1,152 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +using TableLoadingDependenciesVisitor = DDLLoadingDependencyVisitor::Visitor; + +TableNamesSet getLoadingDependenciesFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast) +{ + assert(global_context == global_context->getGlobalContext()); + TableLoadingDependenciesVisitor::Data data; + data.default_database = global_context->getCurrentDatabase(); + data.create_query = ast; + data.global_context = global_context; + TableLoadingDependenciesVisitor visitor{data}; + visitor.visit(ast); + data.dependencies.erase(table); + return data.dependencies; +} + +void DDLLoadingDependencyVisitor::visit(const ASTPtr & ast, Data & data) +{ + /// Looking for functions in column default expressions and dictionary source definition + if (const auto * function = ast->as()) + visit(*function, data); + else if (const auto * dict_source = ast->as()) + visit(*dict_source, data); + else if (const auto * storage = ast->as()) + visit(*storage, data); +} + +bool DDLMatcherBase::needChildVisit(const ASTPtr & node, const ASTPtr & child) +{ + if (node->as()) + return false; + + if (auto * create = node->as()) + { + if (child.get() == create->select) + return false; + } + + return true; +} + +ssize_t DDLMatcherBase::getPositionOfTableNameArgument(const ASTFunction & function) +{ + if (function.name == "joinGet" || + function.name == "dictHas" || + function.name == "dictIsIn" || + function.name.starts_with("dictGet")) + return 0; + + if (Poco::toLower(function.name) == "in") + return 1; + + return -1; +} + +void DDLLoadingDependencyVisitor::visit(const ASTFunction & function, Data & data) +{ + ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function); + if (table_name_arg_idx < 0) + return; + extractTableNameFromArgument(function, data, table_name_arg_idx); +} + +void DDLLoadingDependencyVisitor::visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data) +{ + if (dict_source.name != "clickhouse") + return; + if (!dict_source.elements) + return; + + auto config = getDictionaryConfigurationFromAST(data.create_query->as(), data.global_context); + auto info = getInfoIfClickHouseDictionarySource(config, data.global_context); + + if (!info || !info->is_local) + return; + + if (info->table_name.database.empty()) + info->table_name.database = data.default_database; + data.dependencies.emplace(std::move(info->table_name)); +} + +void DDLLoadingDependencyVisitor::visit(const ASTStorage & storage, Data & data) +{ + if (!storage.engine) + return; + if (storage.engine->name != "Dictionary") + return; + + extractTableNameFromArgument(*storage.engine, data, 0); +} + + +void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx) +{ + /// Just ignore incorrect arguments, proper exception will be thrown later + if (!function.arguments || function.arguments->children.size() <= arg_idx) + return; + + QualifiedTableName qualified_name; + + const auto * arg = function.arguments->as()->children[arg_idx].get(); + if (const auto * literal = arg->as()) + { + if (literal->value.getType() != Field::Types::String) + return; + + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(literal->value.get()); + /// Just return if name if invalid + if (!maybe_qualified_name) + return; + + qualified_name = std::move(*maybe_qualified_name); + } + else if (const auto * identifier = dynamic_cast(arg)) + { + /// ASTIdentifier or ASTTableIdentifier + auto table_identifier = identifier->createTable(); + /// Just return if table identified is invalid + if (!table_identifier) + return; + + qualified_name.database = table_identifier->getDatabaseName(); + qualified_name.table = table_identifier->shortName(); + } + else + { + assert(false); + return; + } + + if (qualified_name.database.empty()) + { + /// It can be table/dictionary from default database or XML dictionary, but we cannot distinguish it here. + qualified_name.database = data.default_database; + } + data.dependencies.emplace(std::move(qualified_name)); +} + +} diff --git a/src/Databases/DDLLoadingDependencyVisitor.h b/src/Databases/DDLLoadingDependencyVisitor.h new file mode 100644 index 00000000000..f987e885266 --- /dev/null +++ b/src/Databases/DDLLoadingDependencyVisitor.h @@ -0,0 +1,54 @@ +#pragma once +#include +#include +#include + + +namespace DB +{ + +class ASTFunction; +class ASTFunctionWithKeyValueArguments; +class ASTStorage; + +using TableNamesSet = std::unordered_set; + +/// Returns a list of all tables which should be loaded before a specified table. +/// For example, a local ClickHouse table should be loaded before a dictionary which uses that table as its source. +/// Does not validate AST, works a best-effort way. +TableNamesSet getLoadingDependenciesFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast); + + +class DDLMatcherBase +{ +public: + static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); + static ssize_t getPositionOfTableNameArgument(const ASTFunction & function); +}; + +/// Visits ASTCreateQuery and extracts the names of all tables which should be loaded before a specified table. +/// TODO: Combine this class with DDLDependencyVisitor (because loading dependencies are a subset of referential dependencies). +class DDLLoadingDependencyVisitor : public DDLMatcherBase +{ +public: + struct Data + { + String default_database; + TableNamesSet dependencies; + ContextPtr global_context; + ASTPtr create_query; + }; + + using Visitor = ConstInDepthNodeVisitor; + + static void visit(const ASTPtr & ast, Data & data); + +private: + static void visit(const ASTFunction & function, Data & data); + static void visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data); + static void visit(const ASTStorage & storage, Data & data); + + static void extractTableNameFromArgument(const ASTFunction & function, Data & data, size_t arg_idx); +}; + +} diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 99d88597385..39295bf499a 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include @@ -142,8 +142,9 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl throw Exception(ErrorCodes::UNKNOWN_TABLE, "Cannot alter: There is no metadata of table {}", table_id.getNameForLogs()); applyMetadataChangesToCreateQuery(it->second, metadata); - TableNamesSet new_dependencies = getDependenciesSetFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second); - DatabaseCatalog::instance().updateLoadingDependencies(table_id, std::move(new_dependencies)); + + auto new_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second); + DatabaseCatalog::instance().updateDependencies(table_id, new_dependencies); } std::vector> DatabaseMemory::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 017199fe44a..01c6e5c8d8c 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -205,21 +205,9 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables } QualifiedTableName qualified_name{TSA_SUPPRESS_WARNING_FOR_READ(database_name), create_query->getTable()}; - TableNamesSet loading_dependencies = getDependenciesSetFromCreateQuery(getContext(), qualified_name, ast); std::lock_guard lock{metadata.mutex}; metadata.parsed_tables[qualified_name] = ParsedTableMetadata{full_path.string(), ast}; - if (loading_dependencies.empty()) - { - metadata.independent_database_objects.emplace_back(std::move(qualified_name)); - } - else - { - for (const auto & dependency : loading_dependencies) - metadata.dependencies_info[dependency].dependent_database_objects.insert(qualified_name); - assert(metadata.dependencies_info[qualified_name].dependencies.empty()); - metadata.dependencies_info[qualified_name].dependencies = std::move(loading_dependencies); - } metadata.total_dictionaries += create_query->is_dictionary; } } @@ -321,8 +309,8 @@ void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & ta out.close(); } - TableNamesSet new_dependencies = getDependenciesSetFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast); - DatabaseCatalog::instance().updateLoadingDependencies(table_id, std::move(new_dependencies)); + auto new_dependencies = getLoadingDependenciesFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast); + DatabaseCatalog::instance().updateDependencies(table_id, new_dependencies); commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path, statement, local_context); } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index c0bc9d3f3a2..2a9f06e77fc 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -702,7 +702,18 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep /// We will drop or move tables which exist only in local metadata Strings tables_to_detach; - std::vector> replicated_tables_to_rename; + + struct RenameEdge + { + String from; + String intermediate; + String to; + }; + + /// This is needed to generate intermediate name + String salt = toString(thread_local_rng()); + + std::vector replicated_tables_to_rename; size_t total_tables = 0; std::vector replicated_ids; for (auto existing_tables_it = getTablesIterator(getContext(), {}); existing_tables_it->isValid(); @@ -719,8 +730,15 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep { if (name != it->second) { + String intermediate_name; + /// Possibly we failed to rename it on previous iteration + /// And this table was already renamed to an intermediate name + if (startsWith(name, ".rename-") && !startsWith(it->second, ".rename-")) + intermediate_name = name; + else + intermediate_name = fmt::format(".rename-{}-{}", name, sipHash64(fmt::format("{}-{}", name, salt))); /// Need just update table name - replicated_tables_to_rename.emplace_back(name, it->second); + replicated_tables_to_rename.push_back({name, intermediate_name, it->second}); } continue; } @@ -840,13 +858,13 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep tables_to_detach.size(), dropped_dictionaries, dropped_tables.size() - dropped_dictionaries, moved_tables); /// Now database is cleared from outdated tables, let's rename ReplicatedMergeTree tables to actual names - for (const auto & old_to_new : replicated_tables_to_rename) + /// We have to take into account that tables names could be changed with two general queries + /// 1) RENAME TABLE. There could be multiple pairs of tables (e.g. RENAME b TO c, a TO b, c TO d) + /// But it is equal to multiple subsequent RENAMEs each of which operates only with two tables + /// 2) EXCHANGE TABLE. This query swaps two names atomically and could not be represented with two separate RENAMEs + auto rename_table = [&](String from, String to) { - const String & from = old_to_new.first; - const String & to = old_to_new.second; - LOG_DEBUG(log, "Will RENAME TABLE {} TO {}", backQuoteIfNeed(from), backQuoteIfNeed(to)); - /// TODO Maybe we should do it in two steps: rename all tables to temporary names and then rename them to actual names? DDLGuardPtr table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, std::min(from, to)); DDLGuardPtr to_table_guard = DatabaseCatalog::instance().getDDLGuard(db_name, std::max(from, to)); @@ -858,7 +876,23 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep DatabaseAtomic::renameTable(make_query_context(), from, *this, to, false, false); tables_metadata_digest = new_digest; assert(checkDigestValid(getContext())); + }; + + LOG_DEBUG(log, "Starting first stage of renaming process. Will rename tables to intermediate names"); + for (auto & [from, intermediate, _] : replicated_tables_to_rename) + { + /// Due to some unknown failures there could be tables + /// which are already in an intermediate state + /// For them we skip the first stage + if (from == intermediate) + continue; + rename_table(from, intermediate); } + LOG_DEBUG(log, "Starting second stage of renaming process. Will rename tables from intermediate to desired names"); + for (auto & [_, intermediate, to] : replicated_tables_to_rename) + rename_table(intermediate, to); + + LOG_DEBUG(log, "Renames completed succesessfully"); for (const auto & id : dropped_tables) DatabaseCatalog::instance().waitTableFinallyDropped(id); diff --git a/src/Databases/NormalizeAndEvaluateConstantsVisitor.cpp b/src/Databases/NormalizeAndEvaluateConstantsVisitor.cpp new file mode 100644 index 00000000000..d9e494e7c9a --- /dev/null +++ b/src/Databases/NormalizeAndEvaluateConstantsVisitor.cpp @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +void NormalizeAndEvaluateConstants::visit(const ASTPtr & ast, Data & data) +{ + assert(data.create_query_context->hasQueryContext()); + + /// Looking for functions in column default expressions and dictionary source definition + if (const auto * function = ast->as()) + visit(*function, data); + else if (const auto * dict_source = ast->as()) + visit(*dict_source, data); +} + +void NormalizeAndEvaluateConstants::visit(const ASTFunction & function, Data & data) +{ + /// Replace expressions like "dictGet(currentDatabase() || '.dict', 'value', toUInt32(1))" + /// with "dictGet('db_name.dict', 'value', toUInt32(1))" + ssize_t table_name_arg_idx = getPositionOfTableNameArgument(function); + if (table_name_arg_idx < 0) + return; + + if (!function.arguments || function.arguments->children.size() <= static_cast(table_name_arg_idx)) + return; + + auto & arg = function.arguments->as().children[table_name_arg_idx]; + if (arg->as()) + arg = evaluateConstantExpressionAsLiteral(arg, data.create_query_context); +} + + +void NormalizeAndEvaluateConstants::visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data) +{ + if (!dict_source.elements) + return; + + auto & expr_list = dict_source.elements->as(); + for (auto & child : expr_list.children) + { + ASTPair * pair = child->as(); + if (pair->second->as()) + { + auto ast_literal = evaluateConstantExpressionAsLiteral(pair->children[0], data.create_query_context); + pair->replace(pair->second, ast_literal); + } + } +} + +} diff --git a/src/Databases/NormalizeAndEvaluateConstantsVisitor.h b/src/Databases/NormalizeAndEvaluateConstantsVisitor.h new file mode 100644 index 00000000000..bc51ddb0601 --- /dev/null +++ b/src/Databases/NormalizeAndEvaluateConstantsVisitor.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +/// Evaluates constants in DDL query. +class NormalizeAndEvaluateConstants : public DDLMatcherBase +{ +public: + struct Data + { + ContextPtr create_query_context; + }; + + using Visitor = ConstInDepthNodeVisitor; + + static void visit(const ASTPtr & ast, Data & data); + +private: + static void visit(const ASTFunction & function, Data & data); + static void visit(const ASTFunctionWithKeyValueArguments & dict_source, Data & data); +}; + +using NormalizeAndEvaluateConstantsVisitor = NormalizeAndEvaluateConstants::Visitor; + +} diff --git a/src/Databases/TablesDependencyGraph.cpp b/src/Databases/TablesDependencyGraph.cpp new file mode 100644 index 00000000000..c4c361089ad --- /dev/null +++ b/src/Databases/TablesDependencyGraph.cpp @@ -0,0 +1,659 @@ +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INFINITE_LOOP; +} + + +namespace +{ + constexpr const size_t CYCLIC_LEVEL = static_cast(-2); +} + + +TablesDependencyGraph::TablesDependencyGraph(const String & name_for_logging_) + : name_for_logging(name_for_logging_) +{ +} + + +TablesDependencyGraph::TablesDependencyGraph(const TablesDependencyGraph & src) + : TablesDependencyGraph(src.name_for_logging) +{ + *this = src; +} + + +TablesDependencyGraph::TablesDependencyGraph(TablesDependencyGraph && src) noexcept + : TablesDependencyGraph(src.name_for_logging) +{ + *this = std::move(src); +} + + +TablesDependencyGraph & TablesDependencyGraph::operator=(const TablesDependencyGraph & src) +{ + if (&src != this) + { + nodes = src.nodes; + nodes_by_database_and_table_names = src.nodes_by_database_and_table_names; + nodes_by_uuid = src.nodes_by_uuid; + levels_calculated = src.levels_calculated; + nodes_sorted_by_level_lazy = src.nodes_sorted_by_level_lazy; + } + return *this; +} + + +TablesDependencyGraph & TablesDependencyGraph::operator=(TablesDependencyGraph && src) noexcept +{ + nodes = std::exchange(src.nodes, decltype(nodes){}); + nodes_by_database_and_table_names = std::exchange(src.nodes_by_database_and_table_names, decltype(nodes_by_database_and_table_names){}); + nodes_by_uuid = std::exchange(src.nodes_by_uuid, decltype(nodes_by_uuid){}); + levels_calculated = std::exchange(src.levels_calculated, false); + nodes_sorted_by_level_lazy = std::exchange(src.nodes_sorted_by_level_lazy, decltype(nodes_sorted_by_level_lazy){}); + return *this; +} + + +void TablesDependencyGraph::clear() +{ + nodes.clear(); + nodes_by_database_and_table_names.clear(); + nodes_by_uuid.clear(); + setNeedRecalculateLevels(); +} + + +bool TablesDependencyGraph::empty() const +{ + return nodes.empty(); +} + + +size_t TablesDependencyGraph::getNumberOfTables() const +{ + return nodes.size(); +} + + +void TablesDependencyGraph::addDependency(const StorageID & table_id, const StorageID & dependency) +{ + auto * table_node = addOrUpdateNode(table_id); + auto * dependency_node = addOrUpdateNode(dependency); + + if (table_node->dependencies.contains(dependency_node)) + return; /// Already have this dependency. + + table_node->dependencies.insert(dependency_node); + dependency_node->dependents.insert(table_node); + + setNeedRecalculateLevels(); +} + + +void TablesDependencyGraph::addDependencies(const StorageID & table_id, const std::vector & dependencies) +{ + auto * table_node = addOrUpdateNode(table_id); + + std::unordered_set new_dependency_nodes; + for (const auto & dependency : dependencies) + new_dependency_nodes.emplace(addOrUpdateNode(dependency)); + + if (table_node->dependencies == new_dependency_nodes) + return; + + auto old_dependencies = getDependencies(*table_node); + auto old_dependency_nodes = std::move(table_node->dependencies); + + if (!old_dependencies.empty()) + { + LOG_WARNING( + getLogger(), + "Replacing outdated dependencies ({}) of {} with: {}", + fmt::join(old_dependencies, ", "), + table_id, + fmt::join(dependencies, ", ")); + } + + for (auto * dependency_node : old_dependency_nodes) + { + if (!new_dependency_nodes.contains(dependency_node)) + dependency_node->dependents.erase(table_node); + } + + for (auto * dependency_node : new_dependency_nodes) + { + if (!old_dependency_nodes.contains(dependency_node)) + dependency_node->dependents.insert(table_node); + } + + table_node->dependencies = std::move(new_dependency_nodes); + setNeedRecalculateLevels(); +} + + +void TablesDependencyGraph::addDependencies(const StorageID & table_id, const TableNamesSet & dependencies) +{ + std::vector converted_dependencies; + for (const auto & dependency : dependencies) + converted_dependencies.emplace_back(StorageID{dependency}); + addDependencies(table_id, converted_dependencies); +} + + +void TablesDependencyGraph::addDependencies(const QualifiedTableName & table_name, const TableNamesSet & dependencies) +{ + addDependencies(StorageID{table_name}, dependencies); +} + + +bool TablesDependencyGraph::removeDependency(const StorageID & table_id, const StorageID & dependency, bool remove_isolated_tables) +{ + auto * table_node = findNode(table_id); + if (!table_node) + return false; + + auto * dependency_node = findNode(dependency); + if (!dependency_node) + return false; + + auto dependency_it = table_node->dependencies.find(dependency_node); + if (dependency_it == table_node->dependencies.end()) + return false; + + table_node->dependencies.erase(dependency_it); + dependency_node->dependents.erase(table_node); + bool table_node_removed = false; + + if (remove_isolated_tables && dependency_node->dependencies.empty() && dependency_node->dependents.empty()) + { + removeNode(dependency_node); + if (table_node == dependency_node) + table_node_removed = true; + } + + if (remove_isolated_tables && !table_node_removed && table_node->dependencies.empty() && table_node->dependents.empty()) + removeNode(table_node); + + setNeedRecalculateLevels(); + return true; +} + + +std::vector TablesDependencyGraph::removeDependencies(const StorageID & table_id, bool remove_isolated_tables) +{ + auto * table_node = findNode(table_id); + if (!table_node) + return {}; + + auto dependency_nodes = std::move(table_node->dependencies); + table_node->dependencies.clear(); + bool table_node_removed = false; + + std::vector dependencies; + dependencies.reserve(dependency_nodes.size()); + + for (auto * dependency_node : dependency_nodes) + { + dependencies.emplace_back(dependency_node->storage_id); + dependency_node->dependents.erase(table_node); + + if (remove_isolated_tables && dependency_node->dependencies.empty() && dependency_node->dependents.empty()) + { + removeNode(dependency_node); + if (table_node == dependency_node) + table_node_removed = true; + } + } + + if (remove_isolated_tables && !table_node_removed && table_node->dependencies.empty() && table_node->dependents.empty()) + removeNode(table_node); + + setNeedRecalculateLevels(); + return dependencies; +} + + +bool TablesDependencyGraph::removeTable(const StorageID & table_id) +{ + auto * table_node = findNode(table_id); + if (!table_node) + return false; + + removeNode(table_node); + + setNeedRecalculateLevels(); + return true; +} + + +TablesDependencyGraph::Node * TablesDependencyGraph::findNode(const StorageID & table_id) const +{ + table_id.assertNotEmpty(); + if (table_id.hasUUID()) + { + auto it = nodes_by_uuid.find(table_id.uuid); + if (it != nodes_by_uuid.end()) + return it->second; /// Found by UUID. + } + if (!table_id.table_name.empty()) + { + auto it = nodes_by_database_and_table_names.find(table_id); + if (it != nodes_by_database_and_table_names.end()) + { + auto * node = it->second; + if (table_id.hasUUID() && node->storage_id.hasUUID() && (table_id.uuid != node->storage_id.uuid)) + return nullptr; /// UUID is different, it's not the node we're looking for. + return node; /// Found by table name. + } + } + return nullptr; /// Not found. +} + + +TablesDependencyGraph::Node * TablesDependencyGraph::addOrUpdateNode(const StorageID & table_id) +{ + auto * node = findNode(table_id); + if (node) + { + /// Node has been found, maybe we can update the information in the graph with new table_name or new UUID. + if (table_id.hasUUID() && !node->storage_id.hasUUID()) + { + node->storage_id.uuid = table_id.uuid; + nodes_by_uuid.emplace(node->storage_id.uuid, node); + } + + if (!table_id.table_name.empty() && ((table_id.table_name != node->storage_id.table_name) || (table_id.database_name != node->storage_id.database_name))) + { + auto it = nodes_by_database_and_table_names.find(table_id); + if (it != nodes_by_database_and_table_names.end()) + { + LOG_WARNING(getLogger(), "Name conflict in the graph having tables {} and {} while adding table {}. Will remove {} from the graph", + node->storage_id, it->second->storage_id, table_id, it->second->storage_id); + removeNode(it->second); + } + nodes_by_database_and_table_names.erase(node->storage_id); + node->storage_id.database_name = table_id.database_name; + node->storage_id.table_name = table_id.table_name; + nodes_by_database_and_table_names.emplace(node->storage_id, node); + } + } + else + { + /// Node has not been found by UUID or table name. + if (!table_id.table_name.empty()) + { + auto it = nodes_by_database_and_table_names.find(table_id); + if (it != nodes_by_database_and_table_names.end()) + { + LOG_WARNING(getLogger(), "Name conflict in the graph having table {} while adding table {}. Will remove {} from the graph", + it->second->storage_id, table_id, it->second->storage_id); + removeNode(it->second); + } + } + auto node_ptr = std::make_shared(table_id); + nodes.insert(node_ptr); + node = node_ptr.get(); + if (table_id.hasUUID()) + nodes_by_uuid.emplace(table_id.uuid, node); + if (!table_id.table_name.empty()) + nodes_by_database_and_table_names.emplace(table_id, node); + } + return node; +} + + +void TablesDependencyGraph::removeNode(Node * node) +{ + auto dependency_nodes = std::move(node->dependencies); + auto dependent_nodes = std::move(node->dependents); + + if (node->storage_id.hasUUID()) + nodes_by_uuid.erase(node->storage_id.uuid); + + if (!node->storage_id.table_name.empty()) + nodes_by_database_and_table_names.erase(node->storage_id); + + for (auto * dependency_node : dependency_nodes) + dependency_node->dependents.erase(node); + + for (auto * dependent_node : dependent_nodes) + dependent_node->dependencies.erase(node); + + nodes.erase(node->shared_from_this()); +} + + +size_t TablesDependencyGraph::removeTablesIf(const std::function & function) +{ + size_t num_removed = 0; + + auto it = nodes.begin(); + while (it != nodes.end()) + { + auto * current = (it++)->get(); + if (function(current->storage_id)) + { + StorageID storage_id = current->storage_id; + removeNode(current); + ++num_removed; + } + } + + if (num_removed) + setNeedRecalculateLevels(); + + return num_removed; +} + + +size_t TablesDependencyGraph::removeIsolatedTables() +{ + size_t num_removed = 0; + auto it = nodes.begin(); + while (it != nodes.end()) + { + auto * current = (it++)->get(); + if (current->dependencies.empty() && current->dependents.empty()) + { + removeNode(current); + ++num_removed; + } + } + + if (num_removed) + setNeedRecalculateLevels(); + + return num_removed; +} + + +std::vector TablesDependencyGraph::getTables() const +{ + std::vector res; + res.reserve(nodes.size()); + for (const auto & node : nodes) + res.emplace_back(node->storage_id); + return res; +} + + +void TablesDependencyGraph::mergeWith(const TablesDependencyGraph & other) +{ + for (const auto & other_node : other.nodes) + addDependencies(other_node->storage_id, other.getDependencies(*other_node)); +} + + +std::vector TablesDependencyGraph::getDependencies(const StorageID & table_id) const +{ + const auto * node = findNode(table_id); + if (!node) + return {}; + return getDependencies(*node); +} + + +std::vector TablesDependencyGraph::getDependencies(const Node & node) +{ + std::vector res; + res.reserve(node.dependencies.size()); + for (const auto * dependency_node : node.dependencies) + res.emplace_back(dependency_node->storage_id); + return res; +} + +size_t TablesDependencyGraph::getNumberOfDependencies(const StorageID & table_id) const +{ + const auto * node = findNode(table_id); + if (!node) + return 0; + return node->dependencies.size(); +} + + +std::vector TablesDependencyGraph::getDependents(const StorageID & table_id) const +{ + const auto * node = findNode(table_id); + if (!node) + return {}; + return getDependents(*node); +} + + +std::vector TablesDependencyGraph::getDependents(const Node & node) +{ + std::vector res; + res.reserve(node.dependents.size()); + for (const auto * dependent_node : node.dependents) + res.emplace_back(dependent_node->storage_id); + return res; +} + + +size_t TablesDependencyGraph::getNumberOfDependents(const StorageID & table_id) const +{ + const auto * node = findNode(table_id); + if (!node) + return 0; + return node->dependents.size(); +} + + +void TablesDependencyGraph::getNumberOfAdjacents(const StorageID & table_id, size_t & num_dependencies, size_t & num_dependents) const +{ + num_dependencies = 0; + num_dependents = 0; + + const auto * node = findNode(table_id); + if (!node) + return; + + num_dependencies = node->dependencies.size(); + num_dependents = node->dependents.size(); +} + + +bool TablesDependencyGraph::isIsolatedTable(const StorageID & table_id) const +{ + const auto * node = findNode(table_id); + if (!node) + return false; + + return node->dependencies.empty() && node->dependents.empty(); +} + + +void TablesDependencyGraph::checkNoCyclicDependencies() const +{ + if (hasCyclicDependencies()) + { + throw Exception( + ErrorCodes::INFINITE_LOOP, + "{}: Tables {} have cyclic dependencies: {}", + name_for_logging, + fmt::join(getTablesWithCyclicDependencies(), ", "), + describeCyclicDependencies()); + } +} + + +bool TablesDependencyGraph::hasCyclicDependencies() const +{ + const auto & nodes_sorted_by_level = getNodesSortedByLevel(); + return !nodes_sorted_by_level.empty() && (nodes_sorted_by_level.back()->level == CYCLIC_LEVEL); +} + + +std::vector TablesDependencyGraph::getTablesWithCyclicDependencies() const +{ + std::vector res; + for (const auto * node : getNodesSortedByLevel() | boost::adaptors::reversed) + { + if (node->level != CYCLIC_LEVEL) + break; + res.emplace_back(node->storage_id); + } + return res; +} + + +String TablesDependencyGraph::describeCyclicDependencies() const +{ + String res; + for (const auto * node : getNodesSortedByLevel() | boost::adaptors::reversed) + { + if (node->level != CYCLIC_LEVEL) + break; + if (!res.empty()) + res += "; "; + res += node->storage_id.getNameForLogs(); + res += " -> ["; + bool need_comma = false; + for (const auto * dependency_node : node->dependencies) + { + if (dependency_node->level != CYCLIC_LEVEL) + continue; + if (need_comma) + res += ", "; + need_comma = true; + res += dependency_node->storage_id.getNameForLogs(); + } + res += "]"; + } + return res; +} + + +void TablesDependencyGraph::setNeedRecalculateLevels() +{ + levels_calculated = false; + nodes_sorted_by_level_lazy.clear(); +} + + +void TablesDependencyGraph::calculateLevels() const +{ + if (levels_calculated) + return; + levels_calculated = true; + + nodes_sorted_by_level_lazy.clear(); + nodes_sorted_by_level_lazy.reserve(nodes.size()); + + std::unordered_set nodes_to_process; + for (const auto & node_ptr : nodes) + nodes_to_process.emplace(node_ptr.get()); + + size_t current_level = 0; + + while (!nodes_to_process.empty()) + { + size_t old_num_sorted = nodes_sorted_by_level_lazy.size(); + + for (auto it = nodes_to_process.begin(); it != nodes_to_process.end();) + { + const auto * current_node = *(it++); + bool has_dependencies = false; + for (const auto * dependency : current_node->dependencies) + { + if (nodes_to_process.contains(dependency)) + has_dependencies = true; + } + + if (!has_dependencies) + { + current_node->level = current_level; + nodes_sorted_by_level_lazy.emplace_back(current_node); + } + } + + if (nodes_sorted_by_level_lazy.size() == old_num_sorted) + break; + + for (size_t i = old_num_sorted; i != nodes_sorted_by_level_lazy.size(); ++i) + nodes_to_process.erase(nodes_sorted_by_level_lazy[i]); + + ++current_level; + } + + for (const auto * node_with_cyclic_dependencies : nodes_to_process) + { + node_with_cyclic_dependencies->level = CYCLIC_LEVEL; + nodes_sorted_by_level_lazy.emplace_back(node_with_cyclic_dependencies); + } +} + + +const TablesDependencyGraph::NodesSortedByLevel & TablesDependencyGraph::getNodesSortedByLevel() const +{ + calculateLevels(); + return nodes_sorted_by_level_lazy; +} + + +std::vector TablesDependencyGraph::getTablesSortedByDependency() const +{ + std::vector res; + res.reserve(nodes.size()); + for (const auto * node : getNodesSortedByLevel()) + { + res.emplace_back(node->storage_id); + } + return res; +} + + +std::vector> TablesDependencyGraph::getTablesSortedByDependencyForParallel() const +{ + std::vector> res; + std::optional last_level; + for (const auto * node : getNodesSortedByLevel()) + { + if (node->level != last_level) + res.emplace_back(); + auto & table_ids = res.back(); + table_ids.emplace_back(node->storage_id); + last_level = node->level; + } + return res; +} + + +void TablesDependencyGraph::log() const +{ + if (empty()) + { + LOG_TEST(getLogger(), "No tables"); + return; + } + + for (const auto * node : getNodesSortedByLevel()) + { + String dependencies_desc = node->dependencies.empty() + ? "no dependencies" + : fmt::format("{} dependencies: {}", node->dependencies.size(), fmt::join(getDependencies(*node), ", ")); + + String level_desc = (node->level == CYCLIC_LEVEL) ? "cyclic" : fmt::format("level {}", node->level); + + LOG_TEST(getLogger(), "Table {} has {} ({})", node->storage_id, dependencies_desc, level_desc); + } +} + + +Poco::Logger * TablesDependencyGraph::getLogger() const +{ + if (!logger) + logger = &Poco::Logger::get(name_for_logging); + return logger; +} + +} diff --git a/src/Databases/TablesDependencyGraph.h b/src/Databases/TablesDependencyGraph.h new file mode 100644 index 00000000000..0d60857dea8 --- /dev/null +++ b/src/Databases/TablesDependencyGraph.h @@ -0,0 +1,171 @@ +#pragma once + +#include + +#include +#include + + +namespace DB +{ +using TableNamesSet = std::unordered_set; + +/// Represents dependencies of some tables on other tables or dictionaries. +/// +/// NOTES: A "dependent" depends on its "dependency". For example, if table "A" depends on table "B", then +/// "B" is a dependency for "A", and "A" is a dependent for "B". +/// +/// Dependencies can be added to the graph in any order. For example, if table "A" depends on "B", and "B" depends on "C", then +/// it's allowed to add first "A->B" and then "B->C", or first "B->C" and then "A->B", the resulting graph will be the same. +/// +/// This class is used to represent various types of table-table dependencies: +/// 1. View dependencies: "source_table -> materialized_view". +/// Data inserted to a source table is also inserted to corresponding materialized views. +/// 2. Loading dependencies: specify in which order tables must be loaded during startup. +/// For example a dictionary should be loaded after it's source table and it's written in the graph as "dictionary -> source_table". +/// 3. Referential dependencies: "table -> all tables mentioned in its definition". +/// Referential dependencies are checked to decide if it's safe to drop a table (it can be unsafe if the table is used by another table). +/// +/// WARNING: This class doesn't have an embedded mutex, so it must be synchronized outside. +class TablesDependencyGraph +{ +public: + explicit TablesDependencyGraph(const String & name_for_logging_); + + TablesDependencyGraph(const TablesDependencyGraph & src); + TablesDependencyGraph(TablesDependencyGraph && src) noexcept; + TablesDependencyGraph & operator=(const TablesDependencyGraph & src); + TablesDependencyGraph & operator=(TablesDependencyGraph && src) noexcept; + + /// The dependency graph is empty if doesn't contain any tables. + bool empty() const; + + /// Clears this dependency graph. + void clear(); + + /// Adds a single dependency "table_id" on "dependency". + void addDependency(const StorageID & table_id, const StorageID & dependency); + + /// Adds a table with specified dependencies if there are no dependencies of the table in the graph yet; + /// otherwise it replaces the dependencies of the table in the graph and shows a warning. + void addDependencies(const StorageID & table_id, const std::vector & dependencies); + void addDependencies(const StorageID & table_id, const TableNamesSet & dependencies); + void addDependencies(const QualifiedTableName & table_name, const TableNamesSet & dependencies); + + /// Removes a single dependency of "table_id" on "dependency". + /// If "remove_isolated_tables" is set the function will also remove tables with no dependencies and no dependents + /// from the graph. + bool removeDependency(const StorageID & table_id, const StorageID & dependency, bool remove_isolated_tables = false); + + /// Removes all dependencies of "table_id", returns those dependencies. + std::vector removeDependencies(const StorageID & table_id, bool remove_isolated_tables = false); + + /// Removes a table from the graph and removes all references to in from the graph (both from its dependencies and dependents). + bool removeTable(const StorageID & table_id); + + /// Removes tables from the graph by a specified filter. + size_t removeTablesIf(const std::function & function); + + /// Removes tables with no dependencies and no dependents from the graph. + size_t removeIsolatedTables(); + + /// Returns the number of tables in the graph. + size_t getNumberOfTables() const; + + /// Returns a list of all tables in the graph. + std::vector getTables() const; + + /// Adds tables and dependencies with another graph. + void mergeWith(const TablesDependencyGraph & other); + + /// Returns a list of dependencies of a specified table. + std::vector getDependencies(const StorageID & table_id) const; + size_t getNumberOfDependencies(const StorageID & table_id) const; + bool hasDependencies(const StorageID & table_id) const { return getNumberOfDependencies(table_id) != 0; } + + /// Returns a list of dependents of a specified table. + std::vector getDependents(const StorageID & table_id) const; + size_t getNumberOfDependents(const StorageID & table_id) const; + bool hasDependents(const StorageID & table_id) const { return getNumberOfDependents(table_id) != 0; } + + /// Returns the number of dependencies and the number of dependents of a specified table. + void getNumberOfAdjacents(const StorageID & table_id, size_t & num_dependencies, size_t & num_dependents) const; + + /// Returns true if a specified table has no dependencies and no dependents. + bool isIsolatedTable(const StorageID & table_id) const; + + /// Checks that there are no cyclic dependencies in the graph. + /// Cyclic dependencies are dependencies like "A->A" or "A->B->C->D->A". + void checkNoCyclicDependencies() const; + bool hasCyclicDependencies() const; + std::vector getTablesWithCyclicDependencies() const; + String describeCyclicDependencies() const; + + /// Returns a list of tables sorted by their dependencies: + /// tables without dependencies first, then + /// tables which depend on the tables without dependencies, then + /// tables which depend on the tables which depend on the tables without dependencies, and so on. + std::vector getTablesSortedByDependency() const; + + /// The same as getTablesSortedByDependency() but make a list for parallel processing. + std::vector> getTablesSortedByDependencyForParallel() const; + + /// Outputs information about this graph as a bunch of logging messages. + void log() const; + +private: + struct Node : public std::enable_shared_from_this + { + StorageID storage_id; + + /// If A depends on B then "A.dependencies" contains "B". + std::unordered_set dependencies; + + /// If A depends on B then "B.dependents" contains "A". + std::unordered_set dependents; + + /// Tables without dependencies have level == 0, tables which depend on the tables without dependencies have level == 1, and so on. + /// Calculated lazily. + mutable size_t level = 0; + + explicit Node(const StorageID & storage_id_) : storage_id(storage_id_) {} + }; + + using NodeSharedPtr = std::shared_ptr; + + struct LessByLevel + { + bool operator()(const Node * left, const Node * right) { return left->level < right->level; } + }; + + std::unordered_set nodes; + + /// Nodes can be found either by UUID or by database name & table name. That's why we need two maps here. + std::unordered_map nodes_by_database_and_table_names; + std::unordered_map nodes_by_uuid; + + /// This is set if both `level` inside each node and `nodes_sorted_by_level_lazy` are calculated. + mutable bool levels_calculated = false; + + /// Nodes sorted by their level. Calculated lazily. + using NodesSortedByLevel = std::vector; + mutable NodesSortedByLevel nodes_sorted_by_level_lazy; + + const String name_for_logging; + mutable Poco::Logger * logger = nullptr; + + Node * findNode(const StorageID & table_id) const; + Node * addOrUpdateNode(const StorageID & table_id); + void removeNode(Node * node); + + static std::vector getDependencies(const Node & node); + static std::vector getDependents(const Node & node); + + void setNeedRecalculateLevels(); + void calculateLevels() const; + const NodesSortedByLevel & getNodesSortedByLevel() const; + + Poco::Logger * getLogger() const; +}; + +} diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index 1114206d469..fbb5b1f17d9 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -14,45 +15,12 @@ namespace DB namespace ErrorCodes { - extern const int INFINITE_LOOP; extern const int LOGICAL_ERROR; } static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256; static constexpr size_t PRINT_MESSAGE_EACH_N_SECONDS = 5; -void mergeDependenciesGraphs(DependenciesInfos & main_dependencies_info, const DependenciesInfos & additional_info) -{ - for (const auto & table_and_info : additional_info) - { - const QualifiedTableName & table = table_and_info.first; - const TableNamesSet & dependent_tables = table_and_info.second.dependent_database_objects; - const TableNamesSet & dependencies = table_and_info.second.dependencies; - - DependenciesInfo & maybe_existing_info = main_dependencies_info[table]; - maybe_existing_info.dependent_database_objects.insert(dependent_tables.begin(), dependent_tables.end()); - if (!dependencies.empty()) - { - if (maybe_existing_info.dependencies.empty()) - maybe_existing_info.dependencies = dependencies; - else if (maybe_existing_info.dependencies != dependencies) - { - /// Can happen on DatabaseReplicated recovery - LOG_WARNING(&Poco::Logger::get("TablesLoader"), "Replacing outdated dependencies ({}) of {} with: {}", - fmt::join(maybe_existing_info.dependencies, ", "), - table, - fmt::join(dependencies, ", ")); - for (const auto & old_dependency : maybe_existing_info.dependencies) - { - [[maybe_unused]] bool removed = main_dependencies_info[old_dependency].dependent_database_objects.erase(table); - assert(removed); - } - maybe_existing_info.dependencies = dependencies; - } - } - } -} - void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch) { if (processed % PRINT_MESSAGE_EACH_N_OBJECTS == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)) @@ -66,6 +34,8 @@ TablesLoader::TablesLoader(ContextMutablePtr global_context_, Databases database : global_context(global_context_) , databases(std::move(databases_)) , strictness_mode(strictness_mode_) +, referential_dependencies("ReferentialDeps") +, loading_dependencies("LoadingDeps") { metadata.default_database = global_context->getCurrentDatabase(); log = &Poco::Logger::get("TablesLoader"); @@ -101,20 +71,18 @@ void TablesLoader::loadTables() stopwatch.restart(); - logDependencyGraph(); - - /// Remove tables that do not exist - removeUnresolvableDependencies(/* remove_loaded */ false); + buildDependencyGraph(); /// Update existing info (it's important for ATTACH DATABASE) - DatabaseCatalog::instance().addLoadingDependencies(metadata.dependencies_info); + DatabaseCatalog::instance().addDependencies(referential_dependencies); - /// Some tables were loaded by database with loadStoredObjects(...). Remove them from graph if necessary. - removeUnresolvableDependencies(/* remove_loaded */ true); + /// Remove tables that do not exist + removeUnresolvableDependencies(); loadTablesInTopologicalOrder(pool); } + void TablesLoader::startupTables() { /// Startup tables after all tables are loaded. Background tasks (merges, mutations, etc) may slow down data parts loading. @@ -123,52 +91,79 @@ void TablesLoader::startupTables() } -void TablesLoader::removeUnresolvableDependencies(bool remove_loaded) +void TablesLoader::buildDependencyGraph() { - auto need_exclude_dependency = [this, remove_loaded](const QualifiedTableName & dependency_name, const DependenciesInfo & info) + for (const auto & [table_name, table_metadata] : metadata.parsed_tables) + { + auto new_loading_dependencies = getLoadingDependenciesFromCreateQuery(global_context, table_name, table_metadata.ast); + + if (!new_loading_dependencies.empty()) + referential_dependencies.addDependencies(table_name, new_loading_dependencies); + + /// We're adding `new_loading_dependencies` to the graph here even if they're empty because + /// we need to have all tables from `metadata.parsed_tables` in the graph. + loading_dependencies.addDependencies(table_name, new_loading_dependencies); + } + + referential_dependencies.log(); + loading_dependencies.log(); +} + + +void TablesLoader::removeUnresolvableDependencies() +{ + auto need_exclude_dependency = [this](const StorageID & table_id) { /// Table exists and will be loaded - if (metadata.parsed_tables.contains(dependency_name)) + if (metadata.parsed_tables.contains(table_id.getQualifiedName())) return false; - /// Table exists and it's already loaded - if (DatabaseCatalog::instance().isTableExist(StorageID(dependency_name.database, dependency_name.table), global_context)) - return remove_loaded; - /// It's XML dictionary. - if (dependency_name.database == metadata.default_database && - global_context->getExternalDictionariesLoader().has(dependency_name.table)) + + if (DatabaseCatalog::instance().isTableExist(table_id, global_context)) { - LOG_WARNING(log, "Tables {} depend on XML dictionary {}, but XML dictionaries are loaded independently." - "Consider converting it to DDL dictionary.", fmt::join(info.dependent_database_objects, ", "), dependency_name); - return true; + /// Table exists and it's already loaded + } + else if (table_id.database_name == metadata.default_database && + global_context->getExternalDictionariesLoader().has(table_id.table_name)) + { + /// Tables depend on a XML dictionary. + LOG_WARNING( + log, + "Tables {} depend on XML dictionary {}, but XML dictionaries are loaded independently." + "Consider converting it to DDL dictionary.", + fmt::join(loading_dependencies.getDependents(table_id), ", "), + table_id); + } + else + { + /// Some tables depend on table "table_id", but there is no such table in DatabaseCatalog and we don't have its metadata. + /// We will ignore it and try to load dependent tables without "table_id" + /// (but most likely dependent tables will fail to load). + LOG_WARNING( + log, + "Tables {} depend on {}, but seems like that does not exist. Will ignore it and try to load existing tables", + fmt::join(loading_dependencies.getDependents(table_id), ", "), + table_id); } - /// Some tables depends on table "dependency_name", but there is no such table in DatabaseCatalog and we don't have its metadata. - /// We will ignore it and try to load dependent tables without "dependency_name" - /// (but most likely dependent tables will fail to load). - LOG_WARNING(log, "Tables {} depend on {}, but seems like the it does not exist. Will ignore it and try to load existing tables", - fmt::join(info.dependent_database_objects, ", "), dependency_name); - - if (!info.dependencies.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} does not exist, but we have seen its AST and found {} dependencies." - "It's a bug", dependency_name, info.dependencies.size()); - if (info.dependent_database_objects.empty()) + size_t num_dependencies, num_dependents; + loading_dependencies.getNumberOfAdjacents(table_id, num_dependencies, num_dependents); + if (num_dependencies || !num_dependents) throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} does not have dependencies and dependent tables as it expected to." - "It's a bug", dependency_name); + "It's a bug", table_id); - return true; + return true; /// Exclude this dependency. }; - auto table_it = metadata.dependencies_info.begin(); - while (table_it != metadata.dependencies_info.end()) - { - auto & info = table_it->second; - if (need_exclude_dependency(table_it->first, info)) - table_it = removeResolvedDependency(table_it, metadata.independent_database_objects); - else - ++table_it; - } + loading_dependencies.removeTablesIf(need_exclude_dependency); + + if (loading_dependencies.getNumberOfTables() != metadata.parsed_tables.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of tables to be loaded is not as expected. It's a bug"); + + /// Cannot load tables with cyclic dependencies. + loading_dependencies.checkNoCyclicDependencies(); } + void TablesLoader::loadTablesInTopologicalOrder(ThreadPool & pool) { /// Compatibility setting which should be enabled by default on attach @@ -176,81 +171,25 @@ void TablesLoader::loadTablesInTopologicalOrder(ThreadPool & pool) ContextMutablePtr load_context = Context::createCopy(global_context); load_context->setSetting("cast_ipv4_ipv6_default_on_conversion_error", 1); - /// Load independent tables in parallel. - /// Then remove loaded tables from dependency graph, find tables/dictionaries that do not have unresolved dependencies anymore, - /// move them to the list of independent tables and load. - /// Repeat until we have some tables to load. - /// If we do not, then either all objects are loaded or there is cyclic dependency. - /// Complexity: O(V + E) - size_t level = 0; - do + /// Load tables in parallel. + auto tables_to_load = loading_dependencies.getTablesSortedByDependencyForParallel(); + + for (size_t level = 0; level != tables_to_load.size(); ++level) { - assert(metadata.parsed_tables.size() == tables_processed + metadata.independent_database_objects.size() + getNumberOfTablesWithDependencies()); - logDependencyGraph(); - - startLoadingIndependentTables(pool, level, load_context); - - TableNames new_independent_database_objects; - for (const auto & table_name : metadata.independent_database_objects) - { - auto info_it = metadata.dependencies_info.find(table_name); - if (info_it == metadata.dependencies_info.end()) - { - /// No tables depend on table_name and it was not even added to dependencies_info - continue; - } - removeResolvedDependency(info_it, new_independent_database_objects); - } - + startLoadingTables(pool, load_context, tables_to_load[level], level); pool.wait(); - - metadata.independent_database_objects = std::move(new_independent_database_objects); - ++level; - } while (!metadata.independent_database_objects.empty()); - - checkCyclicDependencies(); -} - -DependenciesInfosIter TablesLoader::removeResolvedDependency(const DependenciesInfosIter & info_it, TableNames & independent_database_objects) -{ - const QualifiedTableName & table_name = info_it->first; - const DependenciesInfo & info = info_it->second; - if (!info.dependencies.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} is in list of independent tables, but dependencies count is {}." - "It's a bug", table_name, info.dependencies.size()); - if (info.dependent_database_objects.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} does not have dependent tables. It's a bug", table_name); - - /// Decrement number of dependencies for each dependent table - for (const auto & dependent_table : info.dependent_database_objects) - { - auto & dependent_info = metadata.dependencies_info[dependent_table]; - auto & dependencies_set = dependent_info.dependencies; - if (dependencies_set.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to decrement 0 dependencies counter for {}. It's a bug", dependent_table); - if (!dependencies_set.erase(table_name)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot remove {} from dependencies set of {}, it contains only {}", - table_name, dependent_table, fmt::join(dependencies_set, ", ")); - if (dependencies_set.empty()) - { - independent_database_objects.push_back(dependent_table); - if (dependent_info.dependent_database_objects.empty()) - metadata.dependencies_info.erase(dependent_table); - } } - - return metadata.dependencies_info.erase(info_it); } -void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, size_t level, ContextMutablePtr load_context) +void TablesLoader::startLoadingTables(ThreadPool & pool, ContextMutablePtr load_context, const std::vector & tables_to_load, size_t level) { size_t total_tables = metadata.parsed_tables.size(); - LOG_INFO(log, "Loading {} tables with {} dependency level", metadata.independent_database_objects.size(), level); + LOG_INFO(log, "Loading {} tables with dependency level {}", tables_to_load.size(), level); - for (const auto & table_name : metadata.independent_database_objects) + for (const auto & table_id : tables_to_load) { - pool.scheduleOrThrowOnError([this, load_context, total_tables, &table_name]() + pool.scheduleOrThrowOnError([this, load_context, total_tables, table_name = table_id.getQualifiedName()]() { const auto & path_and_query = metadata.parsed_tables[table_name]; databases[table_name.database]->loadTableFromMetadata(load_context, path_and_query.path, table_name, path_and_query.ast, strictness_mode); @@ -259,47 +198,4 @@ void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, size_t level } } -size_t TablesLoader::getNumberOfTablesWithDependencies() const -{ - size_t number_of_tables_with_dependencies = 0; - for (const auto & info : metadata.dependencies_info) - if (!info.second.dependencies.empty()) - ++number_of_tables_with_dependencies; - return number_of_tables_with_dependencies; -} - -void TablesLoader::checkCyclicDependencies() const -{ - /// Loading is finished if all dependencies are resolved - if (metadata.dependencies_info.empty()) - return; - - for (const auto & info : metadata.dependencies_info) - { - LOG_WARNING(log, "Cannot resolve dependencies: Table {} have {} dependencies and {} dependent tables. List of dependent tables: {}", - info.first, info.second.dependencies.size(), - info.second.dependent_database_objects.size(), fmt::join(info.second.dependent_database_objects, ", ")); - assert(info.second.dependencies.empty()); - } - - throw Exception(ErrorCodes::INFINITE_LOOP, "Cannot attach {} tables due to cyclic dependencies. " - "See server log for details.", metadata.dependencies_info.size()); -} - -void TablesLoader::logDependencyGraph() const -{ - LOG_TEST(log, "Have {} independent tables: {}", - metadata.independent_database_objects.size(), - fmt::join(metadata.independent_database_objects, ", ")); - for (const auto & dependencies : metadata.dependencies_info) - { - LOG_TEST(log, - "Table {} have {} dependencies and {} dependent tables. List of dependent tables: {}", - dependencies.first, - dependencies.second.dependencies.size(), - dependencies.second.dependent_database_objects.size(), - fmt::join(dependencies.second.dependent_database_objects, ", ")); - } -} - } diff --git a/src/Databases/TablesLoader.h b/src/Databases/TablesLoader.h index 7a29d0e3958..13d404b96ce 100644 --- a/src/Databases/TablesLoader.h +++ b/src/Databases/TablesLoader.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -34,21 +35,6 @@ struct ParsedTableMetadata }; using ParsedMetadata = std::map; -using TableNames = std::vector; -using TableNamesSet = std::unordered_set; - -struct DependenciesInfo -{ - /// Set of dependencies - TableNamesSet dependencies; - /// Set of tables/dictionaries which depend on this table/dictionary - TableNamesSet dependent_database_objects; -}; - -using DependenciesInfos = std::unordered_map; -using DependenciesInfosIter = std::unordered_map::iterator; - -void mergeDependenciesGraphs(DependenciesInfos & main_dependencies_info, const DependenciesInfos & additional_info); struct ParsedTablesMetadata { @@ -59,17 +45,6 @@ struct ParsedTablesMetadata /// For logging size_t total_dictionaries = 0; - - /// List of tables/dictionaries that do not have any dependencies and can be loaded - TableNames independent_database_objects; - - /// Adjacent list of dependency graph, contains two maps - /// 2. table/dictionary name -> dependent tables/dictionaries list (adjacency list of dependencies graph). - /// 1. table/dictionary name -> dependencies of table/dictionary (adjacency list of inverted dependencies graph) - /// If table A depends on table B, then there is an edge B --> A, i.e. dependencies_info[B].dependent_database_objects contains A - /// and dependencies_info[A].dependencies contain B. - /// We need inverted graph to effectively maintain it on DDL queries that can modify the graph. - DependenciesInfos dependencies_info; }; /// Loads tables (and dictionaries) from specified databases @@ -92,25 +67,18 @@ private: Strings databases_to_load; ParsedTablesMetadata metadata; + TablesDependencyGraph referential_dependencies; + TablesDependencyGraph loading_dependencies; Poco::Logger * log; std::atomic tables_processed{0}; AtomicStopwatch stopwatch; ThreadPool pool; - void removeUnresolvableDependencies(bool remove_loaded); - + void buildDependencyGraph(); + void removeUnresolvableDependencies(); void loadTablesInTopologicalOrder(ThreadPool & pool); - - DependenciesInfosIter removeResolvedDependency(const DependenciesInfosIter & info_it, TableNames & independent_database_objects); - - void startLoadingIndependentTables(ThreadPool & pool, size_t level, ContextMutablePtr load_context); - - void checkCyclicDependencies() const; - - size_t getNumberOfTablesWithDependencies() const; - - void logDependencyGraph() const; + void startLoadingTables(ThreadPool & pool, ContextMutablePtr load_context, const std::vector & tables_to_load, size_t level); }; } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 996268079e8..ed7b8182622 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -480,7 +480,8 @@ void S3ObjectStorage::copyObjectImpl( auto outcome = client_ptr->CopyObject(request); - if (!outcome.IsSuccess() && outcome.GetError().GetExceptionName() == "EntityTooLarge") + if (!outcome.IsSuccess() && (outcome.GetError().GetExceptionName() == "EntityTooLarge" + || outcome.GetError().GetExceptionName() == "InvalidRequest")) { // Can't come here with MinIO, MinIO allows single part upload for large objects. copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata); return; diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a773368b231..fe84d780714 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -180,6 +180,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.try_infer_datetimes = settings.input_format_try_infer_datetimes; format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string; format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference; + format_settings.max_binary_string_size = settings.format_binary_max_string_size; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index c7c9bfc816c..ad2f05a5819 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -79,6 +79,8 @@ struct FormatSettings UInt64 input_allow_errors_num = 0; Float32 input_allow_errors_ratio = 0; + UInt64 max_binary_string_size = 0; + struct { UInt64 row_group_size = 1000000; diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index e9a4e357b7e..f4163a336ef 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1343,6 +1343,30 @@ struct ToYYYYMMDDhhmmssImpl using FactorTransform = ZeroTransform; }; +struct ToDateTimeComponentsImpl +{ + static constexpr auto name = "toDateTimeComponents"; + + static inline DateLUTImpl::DateTimeComponents execute(Int64 t, const DateLUTImpl & time_zone) + { + return time_zone.toDateTimeComponents(t); + } + static inline DateLUTImpl::DateTimeComponents execute(UInt32 t, const DateLUTImpl & time_zone) + { + return time_zone.toDateTimeComponents(static_cast(t)); + } + static inline DateLUTImpl::DateTimeComponents execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toDateTimeComponents(ExtendedDayNum(d)); + } + static inline DateLUTImpl::DateTimeComponents execute(UInt16 d, const DateLUTImpl & time_zone) + { + return time_zone.toDateTimeComponents(DayNum(d)); + } + + using FactorTransform = ZeroTransform; +}; + template struct Transformer diff --git a/src/Functions/TransformDateTime64.h b/src/Functions/TransformDateTime64.h index cb4b3fbb71d..3dab9efeb6b 100644 --- a/src/Functions/TransformDateTime64.h +++ b/src/Functions/TransformDateTime64.h @@ -48,6 +48,10 @@ public: : scale_multiplier(DecimalUtils::scaleMultiplier(scale_)) {} + TransformDateTime64(DateTime64::NativeType scale_multiplier_ = 1) /// NOLINT(google-explicit-constructor) + : scale_multiplier(scale_multiplier_) + {} + template inline auto NO_SANITIZE_UNDEFINED execute(const DateTime64 & t, Args && ... args) const { @@ -127,6 +131,8 @@ public: return wrapped_transform.executeExtendedResult(t, std::forward(args)...); } + DateTime64::NativeType getScaleMultiplier() const { return scale_multiplier; } + private: DateTime64::NativeType scale_multiplier = 1; Transform wrapped_transform = {}; diff --git a/src/Functions/URL/cutURLParameter.cpp b/src/Functions/URL/cutURLParameter.cpp index 6077b068bd0..7a2b96ec874 100644 --- a/src/Functions/URL/cutURLParameter.cpp +++ b/src/Functions/URL/cutURLParameter.cpp @@ -1,82 +1,174 @@ +#include +#include +#include +#include #include -#include #include namespace DB { -struct CutURLParameterImpl +namespace ErrorCodes { + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; +} + +class FunctionCutURLParameter : public IFunction +{ +public: + static constexpr auto name = "cutURLParameter"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isString(arguments[0])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}", + arguments[0]->getName(), getName()); + + if (!isString(arguments[1]) && !isArray(arguments[1])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}", + arguments[1]->getName(), getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + const ColumnPtr column = arguments[0].column; + const ColumnPtr column_needle = arguments[1].column; + + const ColumnConst * col_needle = typeid_cast(&*column_needle); + const ColumnArray * col_needle_const_array = checkAndGetColumnConstData(column_needle.get()); + + if (!col_needle && !col_needle_const_array) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Second argument of function {} must be constant string or constant array", + getName()); + + if (col_needle_const_array) + { + if (!col_needle_const_array->getData().empty() && typeid_cast(*arguments[1].type).getNestedType()->getTypeId() != TypeIndex::String) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Second argument of function {} must be constant array of strings", + getName()); + } + + if (const ColumnString * col = checkAndGetColumn(column.get())) + { + auto col_res = ColumnString::create(); + + ColumnString::Chars & vec_res = col_res->getChars(); + ColumnString::Offsets & offsets_res = col_res->getOffsets(); + vector(col->getChars(), col->getOffsets(), col_needle, col_needle_const_array, vec_res, offsets_res); + return col_res; + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); + } + + static void cutURL(ColumnString::Chars & data, String pattern, size_t prev_offset, size_t & cur_offset) + { + pattern += '='; + const char * param_str = pattern.c_str(); + size_t param_len = pattern.size(); + + const char * url_begin = reinterpret_cast(&data[prev_offset]); + const char * url_end = reinterpret_cast(&data[cur_offset - 2]); + const char * begin_pos = url_begin; + const char * end_pos = begin_pos; + + do + { + const char * query_string_begin = find_first_symbols<'?', '#'>(url_begin, url_end); + if (query_string_begin + 1 >= url_end) + break; + + const char * pos = static_cast(memmem(query_string_begin + 1, url_end - query_string_begin - 1, param_str, param_len)); + if (pos == nullptr) + break; + + if (pos[-1] != '?' && pos[-1] != '#' && pos[-1] != '&') + { + pos = nullptr; + break; + } + + begin_pos = pos; + end_pos = begin_pos + param_len; + + /// Skip the value. + while (*end_pos && *end_pos != '&' && *end_pos != '#') + ++end_pos; + + /// Capture '&' before or after the parameter. + if (*end_pos == '&') + ++end_pos; + else if (begin_pos[-1] == '&') + --begin_pos; + } while (false); + + size_t cut_length = end_pos - begin_pos; + cur_offset -= cut_length; + data.erase(data.begin() + prev_offset + (begin_pos - url_begin), data.begin() + prev_offset+ (end_pos - url_begin)); + } + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, - std::string pattern, + const ColumnConst * col_needle, + const ColumnArray * col_needle_const_array, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { res_data.reserve(data.size()); res_offsets.resize(offsets.size()); - pattern += '='; - const char * param_str = pattern.c_str(); - size_t param_len = pattern.size(); - size_t prev_offset = 0; + size_t cur_offset; + size_t cur_len; size_t res_offset = 0; + size_t cur_res_offset; for (size_t i = 0; i < offsets.size(); ++i) { - size_t cur_offset = offsets[i]; + cur_offset = offsets[i]; + cur_len = cur_offset - prev_offset; + cur_res_offset = res_offset + cur_len; + res_data.resize(cur_res_offset); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &data[prev_offset], cur_len); - const char * url_begin = reinterpret_cast(&data[prev_offset]); - const char * url_end = reinterpret_cast(&data[cur_offset]) - 1; - const char * begin_pos = url_begin; - const char * end_pos = begin_pos; - - do + if (col_needle_const_array) { - const char * query_string_begin = find_first_symbols<'?', '#'>(url_begin, url_end); - if (query_string_begin + 1 >= url_end) - break; - - const char * pos = static_cast(memmem(query_string_begin + 1, url_end - query_string_begin - 1, param_str, param_len)); - if (pos == nullptr) - break; - - if (pos[-1] != '?' && pos[-1] != '#' && pos[-1] != '&') + size_t num_needles = col_needle_const_array->getData().size(); + for (size_t j = 0; j < num_needles; ++j) { - pos = nullptr; - break; + auto field = col_needle_const_array->getData()[j]; + cutURL(res_data, field.get(), res_offset, cur_res_offset); } - - begin_pos = pos; - end_pos = begin_pos + param_len; - - /// Skip the value. - while (*end_pos && *end_pos != '&' && *end_pos != '#') - ++end_pos; - - /// Capture '&' before or after the parameter. - if (*end_pos == '&') - ++end_pos; - else if (begin_pos[-1] == '&') - --begin_pos; - } while (false); - - size_t cut_length = (url_end - url_begin) - (end_pos - begin_pos); - res_data.resize(res_offset + cut_length + 1); - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], url_begin, begin_pos - url_begin); - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset] + (begin_pos - url_begin), end_pos, url_end - end_pos); - res_offset += cut_length + 1; - res_data[res_offset - 1] = 0; - res_offsets[i] = res_offset; - + } + else + { + cutURL(res_data, col_needle->getValue(), res_offset, cur_res_offset); + } + res_offsets[i] = cur_res_offset; + res_offset = cur_res_offset; prev_offset = cur_offset; } } }; -struct NameCutURLParameter { static constexpr auto name = "cutURLParameter"; }; -using FunctionCutURLParameter = FunctionsStringSearchToString; - REGISTER_FUNCTION(CutURLParameter) { factory.registerFunction(); diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp index 0a5aa657a89..d78a8623a18 100644 --- a/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp @@ -31,7 +31,17 @@ void UserDefinedSQLFunctionVisitor::visit(ASTPtr & ast) auto * old_value = child.get(); visit(child); - ast->setOrReplace(old_value, child); + + // child did not change + if (old_value == child.get()) + return; + + // child changed, we need to modify it in the list of children of the parent also + for (auto & current_child : ast->children) + { + if (current_child.get() == old_value) + current_child = child; + } }; if (auto * col_decl = ast->as()) diff --git a/src/Functions/array/arrayFirstLast.cpp b/src/Functions/array/arrayFirstLast.cpp index 8160234a6b0..fa72ecba161 100644 --- a/src/Functions/array/arrayFirstLast.cpp +++ b/src/Functions/array/arrayFirstLast.cpp @@ -43,6 +43,16 @@ struct ArrayFirstLastImpl return array_element; } + static ColumnPtr createNullableColumn(MutableColumnPtr && column, ColumnUInt8::MutablePtr && null_map) + { + if (auto * nullable_column = typeid_cast(column.get())) + { + nullable_column->applyNullMap(*null_map); + return std::move(column); + } + return ColumnNullable::create(std::move(column), std::move(null_map)); + } + static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped) { const auto * column_filter = typeid_cast(&*mapped); @@ -94,7 +104,7 @@ struct ArrayFirstLastImpl } if constexpr (element_not_exists_strategy == ArrayFirstLastElementNotExistsStrategy::Null) - return ColumnNullable::create(std::move(out), std::move(col_null_map_to)); + return createNullableColumn(std::move(out), std::move(col_null_map_to)); return out; } @@ -106,7 +116,7 @@ struct ArrayFirstLastImpl if constexpr (element_not_exists_strategy == ArrayFirstLastElementNotExistsStrategy::Null) { auto col_null_map_to = ColumnUInt8::create(out->size(), true); - return ColumnNullable::create(std::move(out), std::move(col_null_map_to)); + return createNullableColumn(std::move(out), std::move(col_null_map_to)); } return out; @@ -172,7 +182,7 @@ struct ArrayFirstLastImpl } if constexpr (element_not_exists_strategy == ArrayFirstLastElementNotExistsStrategy::Null) - return ColumnNullable::create(std::move(out), std::move(col_null_map_to)); + return createNullableColumn(std::move(out), std::move(col_null_map_to)); return out; } diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index ec9c9df8e49..60668f81edf 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -34,6 +35,7 @@ namespace ErrorCodes namespace { +template class DateDiffImpl { public: @@ -165,8 +167,92 @@ public: template Int64 calculate(const TransformX & transform_x, const TransformY & transform_y, T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const { - return static_cast(transform_y.execute(y, timezone_y)) + if constexpr (is_diff) + return static_cast(transform_y.execute(y, timezone_y)) - static_cast(transform_x.execute(x, timezone_x)); + else + { + auto res = static_cast(transform_y.execute(y, timezone_y)) + - static_cast(transform_x.execute(x, timezone_x)); + DateLUTImpl::DateTimeComponents a_comp; + DateLUTImpl::DateTimeComponents b_comp; + Int64 adjust_value; + auto x_seconds = TransformDateTime64>(transform_x.getScaleMultiplier()).execute(x, timezone_x); + auto y_seconds = TransformDateTime64>(transform_y.getScaleMultiplier()).execute(y, timezone_y); + if (x_seconds <= y_seconds) + { + a_comp = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, timezone_x); + b_comp = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, timezone_y); + adjust_value = -1; + } + else + { + a_comp = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, timezone_y); + b_comp = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, timezone_x); + adjust_value = 1; + } + + if constexpr (std::is_same_v>>) + { + if ((a_comp.date.month > b_comp.date.month) + || ((a_comp.date.month == b_comp.date.month) && ((a_comp.date.day > b_comp.date.day) + || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))) + ))))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + auto x_month_in_quarter = (a_comp.date.month - 1) % 3; + auto y_month_in_quarter = (b_comp.date.month - 1) % 3; + if ((x_month_in_quarter > y_month_in_quarter) + || ((x_month_in_quarter == y_month_in_quarter) && ((a_comp.date.day > b_comp.date.day) + || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))) + ))))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + if ((a_comp.date.day > b_comp.date.day) + || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))) + ))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + auto x_day_of_week = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, timezone_x); + auto y_day_of_week = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, timezone_y); + if ((x_day_of_week > y_day_of_week) + || ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour)) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + if ((a_comp.time.hour > b_comp.time.hour) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + if ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + if (a_comp.time.second > b_comp.time.second) + res += adjust_value; + } + return res; + } } template @@ -193,7 +279,8 @@ private: /** dateDiff('unit', t1, t2, [timezone]) - * t1 and t2 can be Date or DateTime + * age('unit', t1, t2, [timezone]) + * t1 and t2 can be Date, Date32, DateTime or DateTime64 * * If timezone is specified, it applied to both arguments. * If not, timezones from datatypes t1 and t2 are used. @@ -201,10 +288,11 @@ private: * * Timezone matters because days can have different length. */ +template class FunctionDateDiff : public IFunction { public: - static constexpr auto name = "dateDiff"; + static constexpr auto name = is_relative ? "dateDiff" : "age"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override @@ -270,21 +358,21 @@ public: const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); if (unit == "year" || unit == "yy" || unit == "yyyy") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "quarter" || unit == "qq" || unit == "q") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "month" || unit == "mm" || unit == "m") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "week" || unit == "wk" || unit == "ww") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "day" || unit == "dd" || unit == "d") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "hour" || unit == "hh" || unit == "h") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "minute" || unit == "mi" || unit == "n") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "second" || unit == "ss" || unit == "s") - impl.dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); @@ -292,7 +380,7 @@ public: return res; } private: - DateDiffImpl impl{name}; + DateDiffImpl impl{name}; }; @@ -352,14 +440,14 @@ public: return res; } private: - DateDiffImpl impl{name}; + DateDiffImpl impl{name}; }; } REGISTER_FUNCTION(DateDiff) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction>({}, FunctionFactory::CaseInsensitive); } REGISTER_FUNCTION(TimeDiff) @@ -376,4 +464,9 @@ Example: Documentation::Categories{"Dates and Times"}}, FunctionFactory::CaseInsensitive); } +REGISTER_FUNCTION(Age) +{ + factory.registerFunction>({}, FunctionFactory::CaseInsensitive); +} + } diff --git a/src/Functions/translate.cpp b/src/Functions/translate.cpp index b3f1d5ae460..7471fdacbb5 100644 --- a/src/Functions/translate.cpp +++ b/src/Functions/translate.cpp @@ -27,14 +27,14 @@ struct TranslateImpl const std::string & map_to) { if (map_from.size() != map_to.size()) - throw Exception("Second and trird arguments must be the same length", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Second and third arguments must be the same length", ErrorCodes::BAD_ARGUMENTS); std::iota(map.begin(), map.end(), 0); for (size_t i = 0; i < map_from.size(); ++i) { if (!isASCII(map_from[i]) || !isASCII(map_to[i])) - throw Exception("Second and trird arguments must be ASCII strings", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Second and third arguments must be ASCII strings", ErrorCodes::BAD_ARGUMENTS); map[map_from[i]] = map_to[i]; } @@ -125,7 +125,7 @@ struct TranslateUTF8Impl auto map_to_size = UTF8::countCodePoints(reinterpret_cast(map_to.data()), map_to.size()); if (map_from_size != map_to_size) - throw Exception("Second and trird arguments must be the same length", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Second and third arguments must be the same length", ErrorCodes::BAD_ARGUMENTS); std::iota(map_ascii.begin(), map_ascii.end(), 0); diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 9abbec1a53c..05e97b35956 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -334,7 +334,7 @@ std::string HTTPException::makeExceptionMessage( "Received error from remote server {}. " "HTTP status code: {} {}, " "body: {}", - uri, http_status, reason, body); + uri, static_cast(http_status), reason, body); } } diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 69d75f28960..905361d5e00 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -488,7 +488,6 @@ Aggregator::AggregateColumnsConstData Aggregator::Params::makeAggregateColumnsDa void Aggregator::Params::explain(WriteBuffer & out, size_t indent) const { - Strings res; String prefix(indent, ' '); { diff --git a/src/Interpreters/AsynchronousMetricLog.cpp b/src/Interpreters/AsynchronousMetricLog.cpp index 6176bb781ab..eec5da802a7 100644 --- a/src/Interpreters/AsynchronousMetricLog.cpp +++ b/src/Interpreters/AsynchronousMetricLog.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/AsynchronousMetricLog.h b/src/Interpreters/AsynchronousMetricLog.h index 8a19fae29e9..1937aa09dbd 100644 --- a/src/Interpreters/AsynchronousMetricLog.h +++ b/src/Interpreters/AsynchronousMetricLog.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 6f5de6d6e5a..8ea6298c50b 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -16,31 +16,6 @@ #include #include -using namespace DB; - -namespace -{ - -/// We determine output stream sort properties by a local plan (local because otherwise table could be unknown). -/// If no local shard exist for this cluster, no sort properties will be provided, c'est la vie. -auto getRemoteShardsOutputStreamSortingProperties(const std::vector & plans, ContextMutablePtr context) -{ - SortDescription sort_description; - DataStream::SortScope sort_scope = DataStream::SortScope::None; - if (!plans.empty()) - { - if (const auto * step = dynamic_cast(plans.front()->getRootNode()->step.get()); - step && step->getDataStreamTraits().can_enforce_sorting_properties_in_distributed_query) - { - step->adjustSettingsToEnforceSortingPropertiesInDistributedQuery(context); - sort_description = step->getOutputStream().sort_description; - sort_scope = step->getOutputStream().sort_scope; - } - } - return std::make_pair(sort_description, sort_scope); -} -} - namespace DB { @@ -216,8 +191,6 @@ void executeQuery( "_shard_count", Block{{DataTypeUInt32().createColumnConst(1, shards), std::make_shared(), "_shard_count"}}); auto external_tables = context->getExternalTables(); - auto && [sort_description, sort_scope] = getRemoteShardsOutputStreamSortingProperties(plans, new_context); - auto plan = std::make_unique(); auto read_from_remote = std::make_unique( std::move(remote_shards), @@ -231,9 +204,7 @@ void executeQuery( std::move(external_tables), log, shards, - query_info.storage_limits, - std::move(sort_description), - std::move(sort_scope)); + query_info.storage_limits); read_from_remote->setStepDescription("Read from remote replica"); plan->addStep(std::move(read_from_remote)); @@ -329,7 +300,6 @@ void executeQueryWithParallelReplicas( if (!remote_shards.empty()) { auto new_context = Context::createCopy(context); - auto && [sort_description, sort_scope] = getRemoteShardsOutputStreamSortingProperties(plans, new_context); for (const auto & shard : remote_shards) { @@ -345,9 +315,7 @@ void executeQueryWithParallelReplicas( scalars, external_tables, &Poco::Logger::get("ReadFromParallelRemoteReplicasStep"), - query_info.storage_limits, - sort_description, - sort_scope); + query_info.storage_limits); auto remote_plan = std::make_unique(); remote_plan->addStep(std::move(read_from_remote)); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 913b0535358..4b62f4df757 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1525,9 +1525,9 @@ void Context::setCurrentQueryId(const String & query_id) client_info.initial_query_id = client_info.current_query_id; } -void Context::killCurrentQuery() +void Context::killCurrentQuery() const { - if (auto elem = process_list_elem.lock()) + if (auto elem = getProcessListElement()) elem->cancelQuery(true); } @@ -1782,11 +1782,16 @@ void Context::setProcessListElement(QueryStatusPtr elem) { /// Set to a session or query. In the session, only one query is processed at a time. Therefore, the lock is not needed. process_list_elem = elem; + has_process_list_elem = elem.get(); } QueryStatusPtr Context::getProcessListElement() const { - return process_list_elem.lock(); + if (!has_process_list_elem) + return {}; + if (auto res = process_list_elem.lock()) + return res; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Weak pointer to process_list_elem expired during query execution, it's a bug"); } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index bc89ce36edc..2b12b476739 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -14,11 +14,14 @@ #include #include #include +#include #include #include #include #include +#include + #include "config.h" @@ -236,6 +239,7 @@ private: FileProgressCallback file_progress_callback; /// Callback for tracking progress of file loading. std::weak_ptr process_list_elem; /// For tracking total resource usage for query. + bool has_process_list_elem = false; /// It's impossible to check if weak_ptr was initialized or not StorageID insertion_table = StorageID::createEmpty(); /// Saved insertion table in query context bool is_distributed = false; /// Whether the current context it used for distributed query @@ -626,7 +630,7 @@ public: void setCurrentDatabaseNameInGlobalContext(const String & name); void setCurrentQueryId(const String & query_id); - void killCurrentQuery(); + void killCurrentQuery() const; bool hasInsertionTable() const { return !insertion_table.empty(); } void setInsertionTable(StorageID db_and_table) { insertion_table = std::move(db_and_table); } @@ -1077,4 +1081,53 @@ private: DiskSelectorPtr getDiskSelector(std::lock_guard & /* lock */) const; }; +struct HTTPContext : public IHTTPContext +{ + explicit HTTPContext(ContextPtr context_) + : context(Context::createCopy(context_)) + {} + + uint64_t getMaxHstsAge() const override + { + return context->getSettingsRef().hsts_max_age; + } + + uint64_t getMaxUriSize() const override + { + return context->getSettingsRef().http_max_uri_size; + } + + uint64_t getMaxFields() const override + { + return context->getSettingsRef().http_max_fields; + } + + uint64_t getMaxFieldNameSize() const override + { + return context->getSettingsRef().http_max_field_name_size; + } + + uint64_t getMaxFieldValueSize() const override + { + return context->getSettingsRef().http_max_field_value_size; + } + + uint64_t getMaxChunkSize() const override + { + return context->getSettingsRef().http_max_chunk_size; + } + + Poco::Timespan getReceiveTimeout() const override + { + return context->getSettingsRef().http_receive_timeout; + } + + Poco::Timespan getSendTimeout() const override + { + return context->getSettingsRef().http_send_timeout; + } + + ContextPtr context; +}; + } diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 9015f2eeee2..a76b13e5dcf 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -223,6 +223,7 @@ void DatabaseCatalog::shutdownImpl() return it != elem.map.end(); }) == uuid_map.end()); databases.clear(); + referential_dependencies.clear(); view_dependencies.clear(); } @@ -473,13 +474,8 @@ void DatabaseCatalog::updateDatabaseName(const String & old_name, const String & for (const auto & table_name : tables_in_database) { - QualifiedTableName new_table_name{new_name, table_name}; - auto dependencies = tryRemoveLoadingDependenciesUnlocked(QualifiedTableName{old_name, table_name}, /* check_dependencies */ false); - DependenciesInfos new_info; - for (const auto & dependency : dependencies) - new_info[dependency].dependent_database_objects.insert(new_table_name); - new_info[new_table_name].dependencies = std::move(dependencies); - mergeDependenciesGraphs(loading_dependencies, new_info); + auto dependencies = referential_dependencies.removeDependencies(StorageID{old_name, table_name}, /* remove_isolated_tables= */ true); + referential_dependencies.addDependencies(StorageID{new_name, table_name}, dependencies); } } @@ -648,7 +644,10 @@ bool DatabaseCatalog::hasUUIDMapping(const UUID & uuid) std::unique_ptr DatabaseCatalog::database_catalog; DatabaseCatalog::DatabaseCatalog(ContextMutablePtr global_context_) - : WithMutableContext(global_context_), log(&Poco::Logger::get("DatabaseCatalog")) + : WithMutableContext(global_context_) + , referential_dependencies{"ReferentialDeps"} + , view_dependencies{"ViewDeps"} + , log(&Poco::Logger::get("DatabaseCatalog")) { } @@ -692,39 +691,33 @@ DatabasePtr DatabaseCatalog::getDatabase(const String & database_name, ContextPt return getDatabase(resolved_database); } -void DatabaseCatalog::addDependency(const StorageID & from, const StorageID & where) +void DatabaseCatalog::addViewDependency(const StorageID & source_table_id, const StorageID & view_id) { std::lock_guard lock{databases_mutex}; - // FIXME when loading metadata storage may not know UUIDs of it's dependencies, because they are not loaded yet, - // so UUID of `from` is not used here. (same for remove, get and update) - view_dependencies[{from.getDatabaseName(), from.getTableName()}].insert(where); + view_dependencies.addDependency(source_table_id, view_id); } -void DatabaseCatalog::removeDependency(const StorageID & from, const StorageID & where) +void DatabaseCatalog::removeViewDependency(const StorageID & source_table_id, const StorageID & view_id) { std::lock_guard lock{databases_mutex}; - view_dependencies[{from.getDatabaseName(), from.getTableName()}].erase(where); + view_dependencies.removeDependency(source_table_id, view_id, /* remove_isolated_tables= */ true); } -Dependencies DatabaseCatalog::getDependencies(const StorageID & from) const +std::vector DatabaseCatalog::getDependentViews(const StorageID & source_table_id) const { std::lock_guard lock{databases_mutex}; - auto iter = view_dependencies.find({from.getDatabaseName(), from.getTableName()}); - if (iter == view_dependencies.end()) - return {}; - return Dependencies(iter->second.begin(), iter->second.end()); + return view_dependencies.getDependencies(source_table_id); } -void -DatabaseCatalog::updateDependency(const StorageID & old_from, const StorageID & old_where, const StorageID & new_from, - const StorageID & new_where) +void DatabaseCatalog::updateViewDependency(const StorageID & old_source_table_id, const StorageID & old_view_id, + const StorageID & new_source_table_id, const StorageID & new_view_id) { std::lock_guard lock{databases_mutex}; - if (!old_from.empty()) - view_dependencies[{old_from.getDatabaseName(), old_from.getTableName()}].erase(old_where); - if (!new_from.empty()) - view_dependencies[{new_from.getDatabaseName(), new_from.getTableName()}].insert(new_where); + if (!old_source_table_id.empty()) + view_dependencies.removeDependency(old_source_table_id, old_view_id, /* remove_isolated_tables= */ true); + if (!new_source_table_id.empty()) + view_dependencies.addDependency(new_source_table_id, new_view_id); } DDLGuardPtr DatabaseCatalog::getDDLGuard(const String & database, const String & table) @@ -869,6 +862,8 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr { chassert(hasUUIDMapping(table_id.uuid)); drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + /// Do not postpone removal of in-memory tables + ignore_delay = ignore_delay || !table->storesDataOnDisk(); table->is_dropped = true; } else @@ -1048,121 +1043,79 @@ void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid) }); } -void DatabaseCatalog::addLoadingDependencies(const QualifiedTableName & table, TableNamesSet && dependencies) -{ - DependenciesInfos new_info; - for (const auto & dependency : dependencies) - new_info[dependency].dependent_database_objects.insert(table); - new_info[table].dependencies = std::move(dependencies); - addLoadingDependencies(new_info); -} - -void DatabaseCatalog::addLoadingDependencies(const DependenciesInfos & new_infos) +void DatabaseCatalog::addDependencies(const StorageID & table_id, const std::vector & dependencies) { std::lock_guard lock{databases_mutex}; - mergeDependenciesGraphs(loading_dependencies, new_infos); + referential_dependencies.addDependencies(table_id, dependencies); } -DependenciesInfo DatabaseCatalog::getLoadingDependenciesInfo(const StorageID & table_id) const +void DatabaseCatalog::addDependencies(const QualifiedTableName & table_name, const TableNamesSet & dependencies) { std::lock_guard lock{databases_mutex}; - auto it = loading_dependencies.find(table_id.getQualifiedName()); - if (it == loading_dependencies.end()) - return {}; - return it->second; + referential_dependencies.addDependencies(table_name, dependencies); } -TableNamesSet DatabaseCatalog::tryRemoveLoadingDependencies(const StorageID & table_id, bool check_dependencies, bool is_drop_database) +void DatabaseCatalog::addDependencies(const TablesDependencyGraph & extra_graph) { - QualifiedTableName removing_table = table_id.getQualifiedName(); std::lock_guard lock{databases_mutex}; - return tryRemoveLoadingDependenciesUnlocked(removing_table, check_dependencies, is_drop_database); + referential_dependencies.mergeWith(extra_graph); } -TableNamesSet DatabaseCatalog::tryRemoveLoadingDependenciesUnlocked(const QualifiedTableName & removing_table, bool check_dependencies, bool is_drop_database) +std::vector DatabaseCatalog::getDependencies(const StorageID & table_id) const { - auto it = loading_dependencies.find(removing_table); - if (it == loading_dependencies.end()) - return {}; + std::lock_guard lock{databases_mutex}; + return referential_dependencies.getDependencies(table_id); +} - TableNamesSet & dependent = it->second.dependent_database_objects; - if (!dependent.empty()) - { - if (check_dependencies) - checkTableCanBeRemovedOrRenamedImpl(dependent, removing_table, is_drop_database); +std::vector DatabaseCatalog::getDependents(const StorageID & table_id) const +{ + std::lock_guard lock{databases_mutex}; + return referential_dependencies.getDependents(table_id); +} - for (const auto & table : dependent) - { - [[maybe_unused]] bool removed = loading_dependencies[table].dependencies.erase(removing_table); - assert(removed); - } - dependent.clear(); - } - - TableNamesSet dependencies = it->second.dependencies; - for (const auto & table : dependencies) - { - [[maybe_unused]] bool removed = loading_dependencies[table].dependent_database_objects.erase(removing_table); - assert(removed); - } - - loading_dependencies.erase(it); - return dependencies; +std::vector DatabaseCatalog::removeDependencies(const StorageID & table_id, bool check_dependencies, bool is_drop_database) +{ + std::lock_guard lock{databases_mutex}; + if (check_dependencies) + checkTableCanBeRemovedOrRenamedUnlocked(table_id, is_drop_database); + return referential_dependencies.removeDependencies(table_id, /* remove_isolated_tables= */ true); } void DatabaseCatalog::checkTableCanBeRemovedOrRenamed(const StorageID & table_id, bool is_drop_database) const { - QualifiedTableName removing_table = table_id.getQualifiedName(); std::lock_guard lock{databases_mutex}; - auto it = loading_dependencies.find(removing_table); - if (it == loading_dependencies.end()) - return; - - const TableNamesSet & dependent = it->second.dependent_database_objects; - checkTableCanBeRemovedOrRenamedImpl(dependent, removing_table, is_drop_database); + return checkTableCanBeRemovedOrRenamedUnlocked(table_id, is_drop_database); } -void DatabaseCatalog::checkTableCanBeRemovedOrRenamedImpl(const TableNamesSet & dependent, const QualifiedTableName & removing_table, bool is_drop_database) +void DatabaseCatalog::checkTableCanBeRemovedOrRenamedUnlocked(const StorageID & removing_table, bool is_drop_database) const { + const auto & dependents = referential_dependencies.getDependents(removing_table); + if (!is_drop_database) { - if (!dependent.empty()) + if (!dependents.empty()) throw Exception(ErrorCodes::HAVE_DEPENDENT_OBJECTS, "Cannot drop or rename {}, because some tables depend on it: {}", - removing_table, fmt::join(dependent, ", ")); + removing_table, fmt::join(dependents, ", ")); + return; } /// For DROP DATABASE we should ignore dependent tables from the same database. /// TODO unload tables in reverse topological order and remove this code - TableNames from_other_databases; - for (const auto & table : dependent) - if (table.database != removing_table.database) - from_other_databases.push_back(table); + std::vector from_other_databases; + for (const auto & dependent : dependents) + if (dependent.database_name != removing_table.database_name) + from_other_databases.push_back(dependent); if (!from_other_databases.empty()) throw Exception(ErrorCodes::HAVE_DEPENDENT_OBJECTS, "Cannot drop or rename {}, because some tables depend on it: {}", removing_table, fmt::join(from_other_databases, ", ")); } -void DatabaseCatalog::updateLoadingDependencies(const StorageID & table_id, TableNamesSet && new_dependencies) +void DatabaseCatalog::updateDependencies(const StorageID & table_id, const TableNamesSet & new_dependencies) { - if (new_dependencies.empty()) - return; - QualifiedTableName table_name = table_id.getQualifiedName(); std::lock_guard lock{databases_mutex}; - auto it = loading_dependencies.find(table_name); - if (it == loading_dependencies.end()) - it = loading_dependencies.emplace(table_name, DependenciesInfo{}).first; - - auto & old_dependencies = it->second.dependencies; - for (const auto & dependency : old_dependencies) - if (!new_dependencies.contains(dependency)) - loading_dependencies[dependency].dependent_database_objects.erase(table_name); - - for (const auto & dependency : new_dependencies) - if (!old_dependencies.contains(dependency)) - loading_dependencies[dependency].dependent_database_objects.insert(table_name); - - old_dependencies = std::move(new_dependencies); + referential_dependencies.removeDependencies(table_id, /* remove_isolated_tables= */ true); + referential_dependencies.addDependencies(table_id, new_dependencies); } void DatabaseCatalog::cleanupStoreDirectoryTask() diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index a44099b9fdc..a3fa4515a69 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -37,11 +37,7 @@ using DatabasePtr = std::shared_ptr; using DatabaseAndTable = std::pair; using Databases = std::map>; using DiskPtr = std::shared_ptr; - -/// Table -> set of table-views that make SELECT from it. -using ViewDependencies = std::map>; -using Dependencies = std::vector; - +using TableNamesSet = std::unordered_set; /// Allows executing DDL query only in one thread. /// Puts an element into the map, locks tables's mutex, counts how much threads run parallel query on the table, @@ -188,12 +184,11 @@ public: /// Four views (tables, views, columns, schemata) in the "information_schema" database are predefined too. bool isPredefinedTable(const StorageID & table_id) const; - void addDependency(const StorageID & from, const StorageID & where); - void removeDependency(const StorageID & from, const StorageID & where); - Dependencies getDependencies(const StorageID & from) const; - - /// For Materialized and Live View - void updateDependency(const StorageID & old_from, const StorageID & old_where,const StorageID & new_from, const StorageID & new_where); + /// View dependencies between a source table and its view. + void addViewDependency(const StorageID & source_table_id, const StorageID & view_id); + void removeViewDependency(const StorageID & source_table_id, const StorageID & view_id); + std::vector getDependentViews(const StorageID & source_table_id) const; + void updateViewDependency(const StorageID & old_source_table_id, const StorageID & old_view_id, const StorageID & new_source_table_id, const StorageID & new_view_id); /// If table has UUID, addUUIDMapping(...) must be called when table attached to some database /// removeUUIDMapping(...) must be called when it detached, @@ -223,16 +218,20 @@ public: void waitTableFinallyDropped(const UUID & uuid); - void addLoadingDependencies(const QualifiedTableName & table, TableNamesSet && dependencies); - void addLoadingDependencies(const DependenciesInfos & new_infos); - DependenciesInfo getLoadingDependenciesInfo(const StorageID & table_id) const; + /// Referential dependencies between tables: table "A" depends on table "B" + /// if "B" is referenced in the definition of "A". + void addDependencies(const StorageID & table_id, const std::vector & dependencies); + void addDependencies(const QualifiedTableName & table_name, const TableNamesSet & dependencies); + void addDependencies(const TablesDependencyGraph & extra_graph); + std::vector removeDependencies(const StorageID & table_id, bool check_dependencies, bool is_drop_database = false); + + std::vector getDependencies(const StorageID & table_id) const; + std::vector getDependents(const StorageID & table_id) const; + + void updateDependencies(const StorageID & table_id, const TableNamesSet & new_dependencies); - TableNamesSet tryRemoveLoadingDependencies(const StorageID & table_id, bool check_dependencies, bool is_drop_database = false); - TableNamesSet tryRemoveLoadingDependenciesUnlocked(const QualifiedTableName & removing_table, bool check_dependencies, bool is_drop_database = false) TSA_REQUIRES(databases_mutex); void checkTableCanBeRemovedOrRenamed(const StorageID & table_id, bool is_drop_database = false) const; - void updateLoadingDependencies(const StorageID & table_id, TableNamesSet && new_dependencies); - private: // The global instance of database catalog. unique_ptr is to allow // deferred initialization. Thought I'd use std::optional, but I can't @@ -245,7 +244,7 @@ private: void shutdownImpl(); - static void checkTableCanBeRemovedOrRenamedImpl(const TableNamesSet & dependent, const QualifiedTableName & removing_table, bool is_drop_database); + void checkTableCanBeRemovedOrRenamedUnlocked(const StorageID & removing_table, bool is_drop_database) const TSA_REQUIRES(databases_mutex); struct UUIDToStorageMapPart { @@ -281,12 +280,15 @@ private: mutable std::mutex databases_mutex; - ViewDependencies view_dependencies TSA_GUARDED_BY(databases_mutex); - Databases databases TSA_GUARDED_BY(databases_mutex); UUIDToStorageMap uuid_map; - DependenciesInfos loading_dependencies TSA_GUARDED_BY(databases_mutex); + /// Referential dependencies between tables: table "A" depends on table "B" + /// if the table "B" is referenced in the definition of the table "A". + TablesDependencyGraph referential_dependencies TSA_GUARDED_BY(databases_mutex); + + /// View dependencies between a source table and its view. + TablesDependencyGraph view_dependencies TSA_GUARDED_BY(databases_mutex); Poco::Logger * log; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 17788fce53f..84d1c3d9e8a 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1960,6 +1960,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( /// TODO correct conditions optimize_aggregation_in_order = context->getSettingsRef().optimize_aggregation_in_order + && (!context->getSettingsRef().query_plan_aggregation_in_order) && storage && query.groupBy(); query_analyzer.appendGroupBy(chain, only_types || !first_stage, optimize_aggregation_in_order, group_by_elements_actions); diff --git a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp new file mode 100644 index 00000000000..cda91cd4ba1 --- /dev/null +++ b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp @@ -0,0 +1,28 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +BlockIO InterpreterAlterNamedCollectionQuery::execute() +{ + auto current_context = getContext(); + current_context->checkAccess(AccessType::ALTER_NAMED_COLLECTION); + + const auto & query = query_ptr->as(); + if (!query.cluster.empty()) + { + DDLQueryOnClusterParams params; + return executeDDLQueryOnCluster(query_ptr, current_context, params); + } + + NamedCollectionUtils::updateFromSQL(query, current_context); + return {}; +} + +} diff --git a/src/Interpreters/InterpreterAlterNamedCollectionQuery.h b/src/Interpreters/InterpreterAlterNamedCollectionQuery.h new file mode 100644 index 00000000000..889a41f2cb5 --- /dev/null +++ b/src/Interpreters/InterpreterAlterNamedCollectionQuery.h @@ -0,0 +1,22 @@ +#pragma once + +#include + +namespace DB +{ + +class Context; + +class InterpreterAlterNamedCollectionQuery : public IInterpreter, WithMutableContext +{ +public: + InterpreterAlterNamedCollectionQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) + : WithMutableContext(context_), query_ptr(query_ptr_) {} + + BlockIO execute() override; + +private: + ASTPtr query_ptr; +}; + +} diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp new file mode 100644 index 00000000000..c7397d3d64c --- /dev/null +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -0,0 +1,30 @@ +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +BlockIO InterpreterCreateNamedCollectionQuery::execute() +{ + auto current_context = getContext(); + current_context->checkAccess(AccessType::CREATE_NAMED_COLLECTION); + + const auto & query = query_ptr->as(); + + if (!query.cluster.empty()) + { + DDLQueryOnClusterParams params; + return executeDDLQueryOnCluster(query_ptr, current_context, params); + } + + NamedCollectionUtils::createFromSQL(query, current_context); + return {}; +} + +} diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.h b/src/Interpreters/InterpreterCreateNamedCollectionQuery.h new file mode 100644 index 00000000000..26335f618ad --- /dev/null +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.h @@ -0,0 +1,23 @@ +#pragma once + +#include + + +namespace DB +{ + +class InterpreterCreateNamedCollectionQuery : public IInterpreter, WithMutableContext +{ +public: + InterpreterCreateNamedCollectionQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) + : WithMutableContext(context_), query_ptr(query_ptr_) + { + } + + BlockIO execute() override; + +private: + ASTPtr query_ptr; +}; + +} diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 5a25cbd3a50..50536b66185 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -58,6 +58,7 @@ #include #include #include +#include #include @@ -1234,9 +1235,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) /// If table has dependencies - add them to the graph QualifiedTableName qualified_name{database_name, create.getTable()}; - TableNamesSet loading_dependencies = getDependenciesSetFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr); - if (!loading_dependencies.empty()) - DatabaseCatalog::instance().addLoadingDependencies(qualified_name, std::move(loading_dependencies)); + TableNamesSet dependencies = getLoadingDependenciesFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr); + if (!dependencies.empty()) + DatabaseCatalog::instance().addDependencies(qualified_name, dependencies); return fillTableIfNeeded(create); } diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp new file mode 100644 index 00000000000..cb237287dc3 --- /dev/null +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +BlockIO InterpreterDropNamedCollectionQuery::execute() +{ + auto current_context = getContext(); + current_context->checkAccess(AccessType::DROP_NAMED_COLLECTION); + + const auto & query = query_ptr->as(); + if (!query.cluster.empty()) + { + DDLQueryOnClusterParams params; + return executeDDLQueryOnCluster(query_ptr, current_context, params); + } + + if (query.if_exists) + NamedCollectionUtils::removeIfExistsFromSQL(query.collection_name, current_context); + else + NamedCollectionUtils::removeFromSQL(query.collection_name, current_context); + + return {}; +} + +} diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.h b/src/Interpreters/InterpreterDropNamedCollectionQuery.h new file mode 100644 index 00000000000..9158bb455d5 --- /dev/null +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.h @@ -0,0 +1,22 @@ +#pragma once + +#include + +namespace DB +{ + +class Context; + +class InterpreterDropNamedCollectionQuery : public IInterpreter, WithMutableContext +{ +public: + InterpreterDropNamedCollectionQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) + : WithMutableContext(context_), query_ptr(query_ptr_) {} + + BlockIO execute() override; + +private: + ASTPtr query_ptr; +}; + +} diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 2fc733f5608..f237814f879 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -188,8 +188,8 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue if (query.permanently) { /// Server may fail to restart of DETACH PERMANENTLY if table has dependent ones - DatabaseCatalog::instance().tryRemoveLoadingDependencies(table_id, getContext()->getSettingsRef().check_table_dependencies, - is_drop_or_detach_database); + DatabaseCatalog::instance().removeDependencies(table_id, getContext()->getSettingsRef().check_table_dependencies, + is_drop_or_detach_database); /// Drop table from memory, don't touch data, metadata file renamed and will be skipped during server restart database->detachTablePermanently(context_, table_id.table_name); } @@ -243,8 +243,8 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue if (database->getUUID() == UUIDHelpers::Nil) table_lock = table->lockExclusively(context_->getCurrentQueryId(), context_->getSettingsRef().lock_acquire_timeout); - DatabaseCatalog::instance().tryRemoveLoadingDependencies(table_id, getContext()->getSettingsRef().check_table_dependencies, - is_drop_or_detach_database); + DatabaseCatalog::instance().removeDependencies(table_id, getContext()->getSettingsRef().check_table_dependencies, + is_drop_or_detach_database); database->dropTable(context_, table_id.table_name, query.sync); /// We have to drop mmapio cache when dropping table from Ordinary database diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 06d5746af59..e62fca2916e 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -21,6 +21,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -47,6 +50,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -230,6 +236,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); @@ -270,6 +280,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); @@ -314,6 +328,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index 666a674b2c8..82c230ef8e2 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -124,10 +124,10 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c } else { - TableNamesSet dependencies; + std::vector dependencies; if (!exchange_tables) - dependencies = database_catalog.tryRemoveLoadingDependencies(StorageID(elem.from_database_name, elem.from_table_name), - getContext()->getSettingsRef().check_table_dependencies); + dependencies = database_catalog.removeDependencies(StorageID(elem.from_database_name, elem.from_table_name), + getContext()->getSettingsRef().check_table_dependencies); database->renameTable( getContext(), @@ -138,7 +138,7 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c rename.dictionary); if (!dependencies.empty()) - DatabaseCatalog::instance().addLoadingDependencies(QualifiedTableName{elem.to_database_name, elem.to_table_name}, std::move(dependencies)); + DatabaseCatalog::instance().addDependencies(StorageID(elem.to_database_name, elem.to_table_name), dependencies); } } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 4689b234936..9111cad3e16 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2457,9 +2457,13 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac auto grouping_sets_params = getAggregatorGroupingSetsParams(*query_analyzer, keys); SortDescription group_by_sort_description; + SortDescription sort_description_for_merging; if (group_by_info && settings.optimize_aggregation_in_order && !query_analyzer->useGroupingSetKey()) + { group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery()); + sort_description_for_merging = group_by_info->sort_description_for_merging; + } else group_by_info = nullptr; @@ -2481,6 +2485,8 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac group_by_info = std::make_shared( group_by_sort_description, group_by_sort_description.size(), 1 /* direction */, 0 /* limit */); + + sort_description_for_merging = group_by_info->sort_description_for_merging; } auto merge_threads = max_streams; @@ -2504,7 +2510,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac temporary_data_merge_threads, storage_has_evenly_distributed_read, settings.group_by_use_nulls, - std::move(group_by_info), + std::move(sort_description_for_merging), std::move(group_by_sort_description), should_produce_results_in_order_of_bucket_number, settings.enable_memory_bound_merging_of_aggregation_results); diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index 2f7134e6b86..a6cea66df84 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -111,7 +111,15 @@ String InterpreterShowTablesQuery::getRewrittenQuery() DatabaseCatalog::instance().assertDatabaseExists(database); WriteBufferFromOwnString rewritten_query; - rewritten_query << "SELECT name FROM system."; + + if (query.full) + { + rewritten_query << "SELECT name, engine FROM system."; + } + else + { + rewritten_query << "SELECT name FROM system."; + } if (query.dictionaries) rewritten_query << "dictionaries "; diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 84f5570349b..cc22ca6597e 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -230,6 +230,7 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as /// Set up memory profiling thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step); thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + thread_group->performance_counters.setTraceProfileEvents(settings.trace_profile_events); } thread_group->memory_tracker.setDescription("(for query)"); diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp new file mode 100644 index 00000000000..dc4a2a8e435 --- /dev/null +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -0,0 +1,395 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace +{ + +template +void calculateMax(Max & max, T x) +{ + if (Max(x) > max) + max = x; +} + +template +void calculateMaxAndSum(Max & max, Sum & sum, T x) +{ + sum += x; + if (Max(x) > max) + max = x; +} + +} + +ServerAsynchronousMetrics::ServerAsynchronousMetrics( + ContextPtr global_context_, + int update_period_seconds, + int heavy_metrics_update_period_seconds, + const ProtocolServerMetricsFunc & protocol_server_metrics_func_) + : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_) + , WithContext(global_context_) + , heavy_metric_update_period(heavy_metrics_update_period_seconds) +{} + +void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time) +{ + if (auto mark_cache = getContext()->getMarkCache()) + { + new_values["MarkCacheBytes"] = { mark_cache->weight(), "Total size of mark cache in bytes" }; + new_values["MarkCacheFiles"] = { mark_cache->count(), "Total number of mark files cached in the mark cache" }; + } + + if (auto uncompressed_cache = getContext()->getUncompressedCache()) + { + new_values["UncompressedCacheBytes"] = { uncompressed_cache->weight(), + "Total size of uncompressed cache in bytes. Uncompressed cache does not usually improve the performance and should be mostly avoided." }; + new_values["UncompressedCacheCells"] = { uncompressed_cache->count(), + "Total number of entries in the uncompressed cache. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided." }; + } + + if (auto index_mark_cache = getContext()->getIndexMarkCache()) + { + new_values["IndexMarkCacheBytes"] = { index_mark_cache->weight(), "Total size of mark cache for secondary indices in bytes." }; + new_values["IndexMarkCacheFiles"] = { index_mark_cache->count(), "Total number of mark files cached in the mark cache for secondary indices." }; + } + + if (auto index_uncompressed_cache = getContext()->getIndexUncompressedCache()) + { + new_values["IndexUncompressedCacheBytes"] = { index_uncompressed_cache->weight(), + "Total size of uncompressed cache in bytes for secondary indices. Uncompressed cache does not usually improve the performance and should be mostly avoided." }; + new_values["IndexUncompressedCacheCells"] = { index_uncompressed_cache->count(), + "Total number of entries in the uncompressed cache for secondary indices. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided." }; + } + + if (auto mmap_cache = getContext()->getMMappedFileCache()) + { + new_values["MMapCacheCells"] = { mmap_cache->count(), + "The number of files opened with `mmap` (mapped in memory)." + " This is used for queries with the setting `local_filesystem_read_method` set to `mmap`." + " The files opened with `mmap` are kept in the cache to avoid costly TLB flushes."}; + } + + { + auto caches = FileCacheFactory::instance().getAll(); + size_t total_bytes = 0; + size_t total_files = 0; + + for (const auto & [_, cache_data] : caches) + { + total_bytes += cache_data->cache->getUsedCacheSize(); + total_files += cache_data->cache->getFileSegmentsNum(); + } + + new_values["FilesystemCacheBytes"] = { total_bytes, + "Total bytes in the `cache` virtual filesystem. This cache is hold on disk." }; + new_values["FilesystemCacheFiles"] = { total_files, + "Total number of cached file segments in the `cache` virtual filesystem. This cache is hold on disk." }; + } + +#if USE_ROCKSDB + if (auto metadata_cache = getContext()->tryGetMergeTreeMetadataCache()) + { + new_values["MergeTreeMetadataCacheSize"] = { metadata_cache->getEstimateNumKeys(), + "The size of the metadata cache for tables. This cache is experimental and not used in production." }; + } +#endif + +#if USE_EMBEDDED_COMPILER + if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache()) + { + new_values["CompiledExpressionCacheBytes"] = { compiled_expression_cache->weight(), + "Total bytes used for the cache of JIT-compiled code." }; + new_values["CompiledExpressionCacheCount"] = { compiled_expression_cache->count(), + "Total entries in the cache of JIT-compiled code." }; + } +#endif + + new_values["Uptime"] = { getContext()->getUptimeSeconds(), + "The server uptime in seconds. It includes the time spent for server initialization before accepting connections." }; + + if (const auto stats = getHashTablesCacheStatistics()) + { + new_values["HashTableStatsCacheEntries"] = { stats->entries, + "The number of entries in the cache of hash table sizes." + " The cache for hash table sizes is used for predictive optimization of GROUP BY." }; + new_values["HashTableStatsCacheHits"] = { stats->hits, + "The number of times the prediction of a hash table size was correct." }; + new_values["HashTableStatsCacheMisses"] = { stats->misses, + "The number of times the prediction of a hash table size was incorrect." }; + } + + /// Free space in filesystems at data path and logs path. + { + auto stat = getStatVFS(getContext()->getPath()); + + new_values["FilesystemMainPathTotalBytes"] = { stat.f_blocks * stat.f_frsize, + "The size of the volume where the main ClickHouse path is mounted, in bytes." }; + new_values["FilesystemMainPathAvailableBytes"] = { stat.f_bavail * stat.f_frsize, + "Available bytes on the volume where the main ClickHouse path is mounted." }; + new_values["FilesystemMainPathUsedBytes"] = { (stat.f_blocks - stat.f_bavail) * stat.f_frsize, + "Used bytes on the volume where the main ClickHouse path is mounted." }; + new_values["FilesystemMainPathTotalINodes"] = { stat.f_files, + "The total number of inodes on the volume where the main ClickHouse path is mounted. If it is less than 25 million, it indicates a misconfiguration." }; + new_values["FilesystemMainPathAvailableINodes"] = { stat.f_favail, + "The number of available inodes on the volume where the main ClickHouse path is mounted. If it is close to zero, it indicates a misconfiguration, and you will get 'no space left on device' even when the disk is not full." }; + new_values["FilesystemMainPathUsedINodes"] = { stat.f_files - stat.f_favail, + "The number of used inodes on the volume where the main ClickHouse path is mounted. This value mostly corresponds to the number of files." }; + } + + { + /// Current working directory of the server is the directory with logs. + auto stat = getStatVFS("."); + + new_values["FilesystemLogsPathTotalBytes"] = { stat.f_blocks * stat.f_frsize, + "The size of the volume where ClickHouse logs path is mounted, in bytes. It's recommended to have at least 10 GB for logs." }; + new_values["FilesystemLogsPathAvailableBytes"] = { stat.f_bavail * stat.f_frsize, + "Available bytes on the volume where ClickHouse logs path is mounted. If this value approaches zero, you should tune the log rotation in the configuration file." }; + new_values["FilesystemLogsPathUsedBytes"] = { (stat.f_blocks - stat.f_bavail) * stat.f_frsize, + "Used bytes on the volume where ClickHouse logs path is mounted." }; + new_values["FilesystemLogsPathTotalINodes"] = { stat.f_files, + "The total number of inodes on the volume where ClickHouse logs path is mounted." }; + new_values["FilesystemLogsPathAvailableINodes"] = { stat.f_favail, + "The number of available inodes on the volume where ClickHouse logs path is mounted." }; + new_values["FilesystemLogsPathUsedINodes"] = { stat.f_files - stat.f_favail, + "The number of used inodes on the volume where ClickHouse logs path is mounted." }; + } + + /// Free and total space on every configured disk. + { + DisksMap disks_map = getContext()->getDisksMap(); + for (const auto & [name, disk] : disks_map) + { + auto total = disk->getTotalSpace(); + + /// Some disks don't support information about the space. + if (!total) + continue; + + auto available = disk->getAvailableSpace(); + auto unreserved = disk->getUnreservedSpace(); + + new_values[fmt::format("DiskTotal_{}", name)] = { total, + "The total size in bytes of the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." }; + new_values[fmt::format("DiskUsed_{}", name)] = { total - available, + "Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information." }; + new_values[fmt::format("DiskAvailable_{}", name)] = { available, + "Available bytes on the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." }; + new_values[fmt::format("DiskUnreserved_{}", name)] = { unreserved, + "Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems can show a large value like 16 EiB." }; + } + } + + { + auto databases = DatabaseCatalog::instance().getDatabases(); + + size_t max_queue_size = 0; + size_t max_inserts_in_queue = 0; + size_t max_merges_in_queue = 0; + + size_t sum_queue_size = 0; + size_t sum_inserts_in_queue = 0; + size_t sum_merges_in_queue = 0; + + size_t max_absolute_delay = 0; + size_t max_relative_delay = 0; + + size_t max_part_count_for_partition = 0; + + size_t number_of_databases = databases.size(); + size_t total_number_of_tables = 0; + + size_t total_number_of_bytes = 0; + size_t total_number_of_rows = 0; + size_t total_number_of_parts = 0; + + for (const auto & db : databases) + { + /// Check if database can contain MergeTree tables + if (!db.second->canContainMergeTreeTables()) + continue; + + for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next()) + { + ++total_number_of_tables; + const auto & table = iterator->table(); + if (!table) + continue; + + if (MergeTreeData * table_merge_tree = dynamic_cast(table.get())) + { + const auto & settings = getContext()->getSettingsRef(); + + calculateMax(max_part_count_for_partition, table_merge_tree->getMaxPartsCountAndSizeForPartition().first); + total_number_of_bytes += table_merge_tree->totalBytes(settings).value(); + total_number_of_rows += table_merge_tree->totalRows(settings).value(); + total_number_of_parts += table_merge_tree->getPartsCount(); + } + + if (StorageReplicatedMergeTree * table_replicated_merge_tree = typeid_cast(table.get())) + { + StorageReplicatedMergeTree::Status status; + table_replicated_merge_tree->getStatus(status, false); + + calculateMaxAndSum(max_queue_size, sum_queue_size, status.queue.queue_size); + calculateMaxAndSum(max_inserts_in_queue, sum_inserts_in_queue, status.queue.inserts_in_queue); + calculateMaxAndSum(max_merges_in_queue, sum_merges_in_queue, status.queue.merges_in_queue); + + if (!status.is_readonly) + { + try + { + time_t absolute_delay = 0; + time_t relative_delay = 0; + table_replicated_merge_tree->getReplicaDelays(absolute_delay, relative_delay); + + calculateMax(max_absolute_delay, absolute_delay); + calculateMax(max_relative_delay, relative_delay); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__, + "Cannot get replica delay for table: " + backQuoteIfNeed(db.first) + "." + backQuoteIfNeed(iterator->name())); + } + } + } + } + } + + new_values["ReplicasMaxQueueSize"] = { max_queue_size, "Maximum queue size (in the number of operations like get, merge) across Replicated tables." }; + new_values["ReplicasMaxInsertsInQueue"] = { max_inserts_in_queue, "Maximum number of INSERT operations in the queue (still to be replicated) across Replicated tables." }; + new_values["ReplicasMaxMergesInQueue"] = { max_merges_in_queue, "Maximum number of merge operations in the queue (still to be applied) across Replicated tables." }; + + new_values["ReplicasSumQueueSize"] = { sum_queue_size, "Sum queue size (in the number of operations like get, merge) across Replicated tables." }; + new_values["ReplicasSumInsertsInQueue"] = { sum_inserts_in_queue, "Sum of INSERT operations in the queue (still to be replicated) across Replicated tables." }; + new_values["ReplicasSumMergesInQueue"] = { sum_merges_in_queue, "Sum of merge operations in the queue (still to be applied) across Replicated tables." }; + + new_values["ReplicasMaxAbsoluteDelay"] = { max_absolute_delay, "Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data." }; + new_values["ReplicasMaxRelativeDelay"] = { max_relative_delay, "Maximum difference between the replica delay and the delay of the most up-to-date replica of the same table, across Replicated tables." }; + + new_values["MaxPartCountForPartition"] = { max_part_count_for_partition, "Maximum number of parts per partition across all partitions of all tables of MergeTree family. Values larger than 300 indicates misconfiguration, overload, or massive data loading." }; + + new_values["NumberOfDatabases"] = { number_of_databases, "Total number of databases on the server." }; + new_values["NumberOfTables"] = { total_number_of_tables, "Total number of tables summed across the databases on the server, excluding the databases that cannot contain MergeTree tables." + " The excluded database engines are those who generate the set of tables on the fly, like `Lazy`, `MySQL`, `PostgreSQL`, `SQlite`."}; + + new_values["TotalBytesOfMergeTreeTables"] = { total_number_of_bytes, "Total amount of bytes (compressed, including data and indices) stored in all tables of MergeTree family." }; + new_values["TotalRowsOfMergeTreeTables"] = { total_number_of_rows, "Total amount of rows (records) stored in all tables of MergeTree family." }; + new_values["TotalPartsOfMergeTreeTables"] = { total_number_of_parts, "Total amount of data parts in all tables of MergeTree family." + " Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key." }; + } + +#if USE_NURAFT + { + auto keeper_dispatcher = getContext()->tryGetKeeperDispatcher(); + if (keeper_dispatcher) + updateKeeperInformation(*keeper_dispatcher, new_values); + } +#endif + + updateHeavyMetricsIfNeeded(current_time, update_time, new_values); +} + +void ServerAsynchronousMetrics::logImpl(AsynchronousMetricValues & new_values) +{ + /// Log the new metrics. + if (auto asynchronous_metric_log = getContext()->getAsynchronousMetricLog()) + asynchronous_metric_log->addValues(new_values); +} + +void ServerAsynchronousMetrics::updateDetachedPartsStats() +{ + DetachedPartsStats current_values{}; + + for (const auto & db : DatabaseCatalog::instance().getDatabases()) + { + if (!db.second->canContainMergeTreeTables()) + continue; + + for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next()) + { + const auto & table = iterator->table(); + if (!table) + continue; + + if (MergeTreeData * table_merge_tree = dynamic_cast(table.get())) + { + for (const auto & detached_part: table_merge_tree->getDetachedParts()) + { + if (!detached_part.valid_name) + continue; + + if (detached_part.prefix.empty()) + ++current_values.detached_by_user; + + ++current_values.count; + } + } + } + } + + detached_parts_stats = current_values; +} + +void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_time, TimePoint update_time, AsynchronousMetricValues & new_values) +{ + const auto time_after_previous_update = current_time - heavy_metric_previous_update_time; + const bool update_heavy_metric = time_after_previous_update >= heavy_metric_update_period || first_run; + + if (update_heavy_metric) + { + heavy_metric_previous_update_time = update_time; + + Stopwatch watch; + + /// Test shows that listing 100000 entries consuming around 0.15 sec. + updateDetachedPartsStats(); + + watch.stop(); + + /// Normally heavy metrics don't delay the rest of the metrics calculation + /// otherwise log the warning message + auto log_level = std::make_pair(DB::LogsLevel::trace, Poco::Message::PRIO_TRACE); + if (watch.elapsedSeconds() > (update_period.count() / 2.)) + log_level = std::make_pair(DB::LogsLevel::debug, Poco::Message::PRIO_DEBUG); + else if (watch.elapsedSeconds() > (update_period.count() / 4. * 3)) + log_level = std::make_pair(DB::LogsLevel::warning, Poco::Message::PRIO_WARNING); + LOG_IMPL(log, log_level.first, log_level.second, + "Update heavy metrics. " + "Update period {} sec. " + "Update heavy metrics period {} sec. " + "Heavy metrics calculation elapsed: {} sec.", + update_period.count(), + heavy_metric_update_period.count(), + watch.elapsedSeconds()); + + } + + + new_values["NumberOfDetachedParts"] = { detached_parts_stats.count, "The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed." }; + new_values["NumberOfDetachedByUserParts"] = { detached_parts_stats.detached_by_user, "The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed." }; +} + +} diff --git a/src/Interpreters/ServerAsynchronousMetrics.h b/src/Interpreters/ServerAsynchronousMetrics.h new file mode 100644 index 00000000000..81047e2fdf9 --- /dev/null +++ b/src/Interpreters/ServerAsynchronousMetrics.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class ServerAsynchronousMetrics : public AsynchronousMetrics, WithContext +{ +public: + ServerAsynchronousMetrics( + ContextPtr global_context_, + int update_period_seconds, + int heavy_metrics_update_period_seconds, + const ProtocolServerMetricsFunc & protocol_server_metrics_func_); +private: + void updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time) override; + void logImpl(AsynchronousMetricValues & new_values) override; + + const Duration heavy_metric_update_period; + TimePoint heavy_metric_previous_update_time; + + struct DetachedPartsStats + { + size_t count; + size_t detached_by_user; + }; + + DetachedPartsStats detached_parts_stats{}; + + void updateDetachedPartsStats(); + void updateHeavyMetricsIfNeeded(TimePoint current_time, TimePoint update_time, AsynchronousMetricValues & new_values); +}; + +} diff --git a/src/Interpreters/StorageID.cpp b/src/Interpreters/StorageID.cpp index 8811adc087b..70dea02ccc5 100644 --- a/src/Interpreters/StorageID.cpp +++ b/src/Interpreters/StorageID.cpp @@ -64,21 +64,8 @@ String StorageID::getNameForLogs() const + (hasUUID() ? " (" + toString(uuid) + ")" : ""); } -bool StorageID::operator<(const StorageID & rhs) const -{ - assertNotEmpty(); - /// It's needed for ViewDependencies - if (!hasUUID() && !rhs.hasUUID()) - /// If both IDs don't have UUID, compare them like pair of strings - return std::tie(database_name, table_name) < std::tie(rhs.database_name, rhs.table_name); - else if (hasUUID() && rhs.hasUUID()) - /// If both IDs have UUID, compare UUIDs and ignore database and table name - return uuid < rhs.uuid; - else - /// All IDs without UUID are less, then all IDs with UUID - return !hasUUID(); -} - +/// NOTE: This implementation doesn't allow to implement a good "operator <". +/// Because "a != b" must be equivalent to "(a < b) || (b < a)", and we can't make "operator <" to meet that. bool StorageID::operator==(const StorageID & rhs) const { assertNotEmpty(); diff --git a/src/Interpreters/StorageID.h b/src/Interpreters/StorageID.h index 43710988243..68c83f753b5 100644 --- a/src/Interpreters/StorageID.h +++ b/src/Interpreters/StorageID.h @@ -45,6 +45,8 @@ struct StorageID StorageID(const ASTTableIdentifier & table_identifier_node); /// NOLINT StorageID(const ASTPtr & node); /// NOLINT + explicit StorageID(const QualifiedTableName & qualified_name) : StorageID(qualified_name.database, qualified_name.table) { } + String getDatabaseName() const; String getTableName() const; @@ -71,7 +73,6 @@ struct StorageID bool hasDatabase() const { return !database_name.empty(); } - bool operator<(const StorageID & rhs) const; bool operator==(const StorageID & rhs) const; void assertNotEmpty() const @@ -97,8 +98,47 @@ struct StorageID /// Get short, but unique, name. String getShortName() const; + /// Calculates hash using only the database and table name of a StorageID. + struct DatabaseAndTableNameHash + { + size_t operator()(const StorageID & storage_id) const + { + SipHash hash_state; + hash_state.update(storage_id.database_name.data(), storage_id.database_name.size()); + hash_state.update(storage_id.table_name.data(), storage_id.table_name.size()); + return hash_state.get64(); + } + }; + + /// Checks if the database and table name of two StorageIDs are equal. + struct DatabaseAndTableNameEqual + { + bool operator()(const StorageID & left, const StorageID & right) const + { + return (left.database_name == right.database_name) && (left.table_name == right.table_name); + } + }; + private: StorageID() = default; }; } + +namespace fmt +{ + template <> + struct formatter + { + static constexpr auto parse(format_parse_context & ctx) + { + return ctx.begin(); + } + + template + auto format(const DB::StorageID & storage_id, FormatContext & ctx) + { + return format_to(ctx.out(), "{}", storage_id.getNameForLogs()); + } + }; +} diff --git a/src/Interpreters/TraceCollector.cpp b/src/Interpreters/TraceCollector.cpp index 41a7fcf8389..050dea02717 100644 --- a/src/Interpreters/TraceCollector.cpp +++ b/src/Interpreters/TraceCollector.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -96,6 +97,12 @@ void TraceCollector::run() Int64 size; readPODBinary(size, in); + ProfileEvents::Event event; + readPODBinary(event, in); + + ProfileEvents::Count increment; + readPODBinary(increment, in); + if (trace_log) { // time and time_in_microseconds are both being constructed from the same timespec so that the @@ -105,7 +112,7 @@ void TraceCollector::run() UInt64 time = static_cast(ts.tv_sec * 1000000000LL + ts.tv_nsec); UInt64 time_in_microseconds = static_cast((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000)); - TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size}; + TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, event, increment}; trace_log->add(element); } } diff --git a/src/Interpreters/TraceCollector.h b/src/Interpreters/TraceCollector.h index b3f11ca5756..40fa854b791 100644 --- a/src/Interpreters/TraceCollector.h +++ b/src/Interpreters/TraceCollector.h @@ -1,7 +1,5 @@ #pragma once - #include -#include class StackTrace; @@ -21,11 +19,6 @@ public: explicit TraceCollector(std::shared_ptr trace_log_); ~TraceCollector(); - static inline void collect(TraceType trace_type, const StackTrace & stack_trace, Int64 size) - { - return TraceSender::send(trace_type, stack_trace, size); - } - private: std::shared_ptr trace_log; ThreadFromGlobalPool thread; diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index c16a73e75dc..0408ebe504b 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -20,6 +21,7 @@ const TraceDataType::Values TraceLogElement::trace_values = {"Memory", static_cast(TraceType::Memory)}, {"MemorySample", static_cast(TraceType::MemorySample)}, {"MemoryPeak", static_cast(TraceType::MemoryPeak)}, + {"ProfileEvent", static_cast(TraceType::ProfileEvent)}, }; NamesAndTypesList TraceLogElement::getNamesAndTypes() @@ -36,6 +38,8 @@ NamesAndTypesList TraceLogElement::getNamesAndTypes() {"query_id", std::make_shared()}, {"trace", std::make_shared(std::make_shared())}, {"size", std::make_shared()}, + {"event", std::make_shared(std::make_shared())}, + {"increment", std::make_shared()}, }; } @@ -53,6 +57,13 @@ void TraceLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(query_id.data(), query_id.size()); columns[i++]->insert(trace); columns[i++]->insert(size); + + String event_name; + if (event != ProfileEvents::end()) + event_name = ProfileEvents::getName(event); + + columns[i++]->insert(event_name); + columns[i++]->insert(increment); } } diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index 43d7861327f..c481f033a72 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -3,8 +3,9 @@ #include #include #include -#include #include +#include +#include #include #include @@ -26,7 +27,12 @@ struct TraceLogElement UInt64 thread_id{}; String query_id{}; Array trace{}; - Int64 size{}; /// Allocation size in bytes for TraceType::Memory + /// Allocation size in bytes for TraceType::Memory. + Int64 size{}; + /// ProfileEvent for TraceType::ProfileEvent. + ProfileEvents::Event event{ProfileEvents::end()}; + /// Increment of profile event for TraceType::ProfileEvent. + ProfileEvents::Count increment{}; static std::string name() { return "TraceLog"; } static NamesAndTypesList getNamesAndTypes(); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index c2680e27444..2bd204a0d42 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -417,6 +417,8 @@ static std::tuple executeQueryImpl( throw; } + /// Avoid early destruction of process_list_entry if it was not saved to `res` yet (in case of exception) + ProcessList::EntryPtr process_list_entry; BlockIO res; std::shared_ptr implicit_txn_control{}; String query_database; @@ -509,7 +511,6 @@ static std::tuple executeQueryImpl( checkASTSizeLimits(*ast, settings); /// Put query to process list. But don't put SHOW PROCESSLIST query itself. - ProcessList::EntryPtr process_list_entry; if (!internal && !ast->as()) { /// processlist also has query masked now, to avoid secrets leaks though SHOW PROCESSLIST by other users. diff --git a/src/Interpreters/loadMetadata.h b/src/Interpreters/loadMetadata.h index b229a2b4c31..3553011fe4d 100644 --- a/src/Interpreters/loadMetadata.h +++ b/src/Interpreters/loadMetadata.h @@ -1,7 +1,6 @@ #pragma once #include -#include namespace DB diff --git a/src/Parsers/ASTAlterNamedCollectionQuery.cpp b/src/Parsers/ASTAlterNamedCollectionQuery.cpp new file mode 100644 index 00000000000..7e95147ad75 --- /dev/null +++ b/src/Parsers/ASTAlterNamedCollectionQuery.cpp @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +ASTPtr ASTAlterNamedCollectionQuery::clone() const +{ + return std::make_shared(*this); +} + +void ASTAlterNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << "Alter NAMED COLLECTION "; + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); + formatOnCluster(settings); + if (!changes.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " SET " << (settings.hilite ? hilite_none : ""); + bool first = true; + for (const auto & change : changes) + { + if (!first) + settings.ostr << ", "; + else + first = false; + + formatSettingName(change.name, settings.ostr); + if (settings.show_secrets) + settings.ostr << " = " << applyVisitor(FieldVisitorToString(), change.value); + else + settings.ostr << " = '[HIDDEN]'"; + } + } + if (!delete_keys.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " DELETE " << (settings.hilite ? hilite_none : ""); + bool first = true; + for (const auto & key : delete_keys) + { + if (!first) + settings.ostr << ", "; + else + first = false; + + formatSettingName(key, settings.ostr); + } + } +} + +} diff --git a/src/Parsers/ASTAlterNamedCollectionQuery.h b/src/Parsers/ASTAlterNamedCollectionQuery.h new file mode 100644 index 00000000000..a8aa06200fd --- /dev/null +++ b/src/Parsers/ASTAlterNamedCollectionQuery.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class ASTAlterNamedCollectionQuery : public IAST, public ASTQueryWithOnCluster +{ +public: + std::string collection_name; + SettingsChanges changes; + std::vector delete_keys; + bool if_exists = false; + + String getID(char) const override { return "AlterNamedCollectionQuery"; } + + ASTPtr clone() const override; + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; + + ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster(clone()); } +}; + +} diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 959fc55c945..80801278963 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -509,7 +509,7 @@ bool ASTAlterQuery::isOneCommandTypeOnly(const ASTAlterCommand::Type & type) con bool ASTAlterQuery::isSettingsAlter() const { - return isOneCommandTypeOnly(ASTAlterCommand::MODIFY_SETTING); + return isOneCommandTypeOnly(ASTAlterCommand::MODIFY_SETTING) || isOneCommandTypeOnly(ASTAlterCommand::RESET_SETTING); } bool ASTAlterQuery::isFreezeAlter() const diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.cpp b/src/Parsers/ASTCreateNamedCollectionQuery.cpp new file mode 100644 index 00000000000..97e83541f05 --- /dev/null +++ b/src/Parsers/ASTCreateNamedCollectionQuery.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +ASTPtr ASTCreateNamedCollectionQuery::clone() const +{ + return std::make_shared(*this); +} + +void ASTCreateNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE NAMED COLLECTION "; + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); + + formatOnCluster(settings); + + settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : ""); + bool first = true; + for (const auto & change : changes) + { + if (!first) + settings.ostr << ", "; + else + first = false; + + formatSettingName(change.name, settings.ostr); + + if (settings.show_secrets) + settings.ostr << " = " << applyVisitor(FieldVisitorToString(), change.value); + else + settings.ostr << " = '[HIDDEN]'"; + } +} + +} diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.h b/src/Parsers/ASTCreateNamedCollectionQuery.h new file mode 100644 index 00000000000..901e6b50a4c --- /dev/null +++ b/src/Parsers/ASTCreateNamedCollectionQuery.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class ASTCreateNamedCollectionQuery : public IAST, public ASTQueryWithOnCluster +{ +public: + std::string collection_name; + SettingsChanges changes; + + String getID(char) const override { return "CreateNamedCollectionQuery"; } + + ASTPtr clone() const override; + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; + + ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster(clone()); } + + std::string getCollectionName() const; +}; + +} diff --git a/src/Parsers/ASTDropNamedCollectionQuery.cpp b/src/Parsers/ASTDropNamedCollectionQuery.cpp new file mode 100644 index 00000000000..3b8568cfd70 --- /dev/null +++ b/src/Parsers/ASTDropNamedCollectionQuery.cpp @@ -0,0 +1,20 @@ +#include +#include +#include + +namespace DB +{ + +ASTPtr ASTDropNamedCollectionQuery::clone() const +{ + return std::make_shared(*this); +} + +void ASTDropNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP NAMED COLLECTION "; + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); + formatOnCluster(settings); +} + +} diff --git a/src/Parsers/ASTDropNamedCollectionQuery.h b/src/Parsers/ASTDropNamedCollectionQuery.h new file mode 100644 index 00000000000..0b71bdaf213 --- /dev/null +++ b/src/Parsers/ASTDropNamedCollectionQuery.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class ASTDropNamedCollectionQuery : public IAST, public ASTQueryWithOnCluster +{ +public: + std::string collection_name; + bool if_exists = false; + + String getID(char) const override { return "DropNamedCollectionQuery"; } + + ASTPtr clone() const override; + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; + + ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster(clone()); } +}; + +} diff --git a/src/Parsers/ASTShowTablesQuery.h b/src/Parsers/ASTShowTablesQuery.h index 04cf9d6645a..c3e7f0799d4 100644 --- a/src/Parsers/ASTShowTablesQuery.h +++ b/src/Parsers/ASTShowTablesQuery.h @@ -22,6 +22,7 @@ public: bool changed{false}; bool temporary{false}; bool caches{false}; + bool full{false}; String cluster_str; String from; diff --git a/src/Parsers/ParserAlterNamedCollectionQuery.cpp b/src/Parsers/ParserAlterNamedCollectionQuery.cpp new file mode 100644 index 00000000000..9108747ad82 --- /dev/null +++ b/src/Parsers/ParserAlterNamedCollectionQuery.cpp @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_alter("ALTER"); + ParserKeyword s_collection("NAMED COLLECTION"); + ParserKeyword s_delete("DELETE"); + + ParserIdentifier name_p; + ParserSetQuery set_p; + ParserToken s_comma(TokenType::Comma); + + String cluster_str; + bool if_exists = false; + + ASTPtr collection_name; + ASTPtr set; + std::vector delete_keys; + + if (!s_alter.ignore(pos, expected)) + return false; + + if (!s_collection.ignore(pos, expected)) + return false; + + if (!name_p.parse(pos, collection_name, expected)) + return false; + + if (ParserKeyword{"ON"}.ignore(pos, expected)) + { + if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) + return false; + } + + bool parsed_delete = false; + if (!set_p.parse(pos, set, expected)) + { + if (!s_delete.ignore(pos, expected)) + return false; + + parsed_delete = true; + } + else if (s_delete.ignore(pos, expected)) + { + parsed_delete = true; + } + + if (parsed_delete) + { + while (true) + { + if (!delete_keys.empty() && !s_comma.ignore(pos)) + break; + + ASTPtr key; + if (!name_p.parse(pos, key, expected)) + return false; + + delete_keys.push_back(getIdentifierName(key)); + } + } + + auto query = std::make_shared(); + + query->collection_name = getIdentifierName(collection_name); + query->if_exists = if_exists; + query->cluster = std::move(cluster_str); + if (set) + query->changes = set->as()->changes; + query->delete_keys = delete_keys; + + node = query; + return true; +} + +} diff --git a/src/Parsers/ParserAlterNamedCollectionQuery.h b/src/Parsers/ParserAlterNamedCollectionQuery.h new file mode 100644 index 00000000000..66ad61447dd --- /dev/null +++ b/src/Parsers/ParserAlterNamedCollectionQuery.h @@ -0,0 +1,14 @@ +#pragma once + +#include "IParserBase.h" + +namespace DB +{ + +class ParserAlterNamedCollectionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "Alter NAMED COLLECTION query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 9c1c682ca03..90df8a8f79a 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -1383,6 +1384,59 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } +bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_create("CREATE"); + ParserKeyword s_attach("ATTACH"); + ParserKeyword s_named_collection("NAMED COLLECTION"); + ParserKeyword s_as("AS"); + + ParserToken s_comma(TokenType::Comma); + ParserIdentifier name_p; + + ASTPtr collection_name; + String cluster_str; + + if (!s_create.ignore(pos, expected)) + return false; + + if (!s_named_collection.ignore(pos, expected)) + return false; + + if (!name_p.parse(pos, collection_name, expected)) + return false; + + if (ParserKeyword{"ON"}.ignore(pos, expected)) + { + if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) + return false; + } + + if (!s_as.ignore(pos, expected)) + return false; + + SettingsChanges changes; + + while (true) + { + if (!changes.empty() && !s_comma.ignore(pos)) + break; + + changes.push_back(SettingChange{}); + + if (!ParserSetQuery::parseNameValuePair(changes.back(), pos, expected)) + return false; + } + + auto query = std::make_shared(); + + tryGetIdentifierNameInto(collection_name, query->collection_name); + query->changes = changes; + + node = query; + return true; +} + bool ParserCreateDictionaryQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_create("CREATE"); diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index e1573c92dab..e97033c51f0 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -522,6 +522,13 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserCreateNamedCollectionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "CREATE NAMED COLLECTION"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + /** Query like this: * CREATE|ATTACH TABLE [IF NOT EXISTS] [db.]name diff --git a/src/Parsers/ParserDropNamedCollectionQuery.cpp b/src/Parsers/ParserDropNamedCollectionQuery.cpp new file mode 100644 index 00000000000..1ea8aa6d75d --- /dev/null +++ b/src/Parsers/ParserDropNamedCollectionQuery.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserDropNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_drop("DROP"); + ParserKeyword s_collection("NAMED COLLECTION"); + ParserKeyword s_if_exists("IF EXISTS"); + ParserIdentifier name_p; + + String cluster_str; + bool if_exists = false; + + ASTPtr collection_name; + + if (!s_drop.ignore(pos, expected)) + return false; + + if (!s_collection.ignore(pos, expected)) + return false; + + if (s_if_exists.ignore(pos, expected)) + if_exists = true; + + if (!name_p.parse(pos, collection_name, expected)) + return false; + + if (ParserKeyword{"ON"}.ignore(pos, expected)) + { + if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) + return false; + } + + auto query = std::make_shared(); + + tryGetIdentifierNameInto(collection_name, query->collection_name); + query->if_exists = if_exists; + query->cluster = std::move(cluster_str); + + node = query; + return true; +} + +} diff --git a/src/Parsers/ParserDropNamedCollectionQuery.h b/src/Parsers/ParserDropNamedCollectionQuery.h new file mode 100644 index 00000000000..5dd3ef63e05 --- /dev/null +++ b/src/Parsers/ParserDropNamedCollectionQuery.h @@ -0,0 +1,14 @@ +#pragma once + +#include "IParserBase.h" + +namespace DB +{ + +class ParserDropNamedCollectionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "DROP NAMED COLLECTION query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/src/Parsers/ParserQuery.cpp b/src/Parsers/ParserQuery.cpp index ca837e7dcc5..77e7b58e6b1 100644 --- a/src/Parsers/ParserQuery.cpp +++ b/src/Parsers/ParserQuery.cpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include #include @@ -46,6 +48,9 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserCreateSettingsProfileQuery create_settings_profile_p; ParserCreateFunctionQuery create_function_p; ParserDropFunctionQuery drop_function_p; + ParserCreateNamedCollectionQuery create_named_collection_p; + ParserDropNamedCollectionQuery drop_named_collection_p; + ParserAlterNamedCollectionQuery alter_named_collection_p; ParserCreateIndexQuery create_index_p; ParserDropIndexQuery drop_index_p; ParserDropAccessEntityQuery drop_access_entity_p; @@ -69,6 +74,9 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) || create_settings_profile_p.parse(pos, node, expected) || create_function_p.parse(pos, node, expected) || drop_function_p.parse(pos, node, expected) + || create_named_collection_p.parse(pos, node, expected) + || drop_named_collection_p.parse(pos, node, expected) + || alter_named_collection_p.parse(pos, node, expected) || create_index_p.parse(pos, node, expected) || drop_index_p.parse(pos, node, expected) || drop_access_entity_p.parse(pos, node, expected) diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 201cd750af8..107db51f869 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -108,6 +108,13 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } + /// FROM database.table or FROM table or FROM (subquery) or FROM tableFunction(...) + if (s_from.ignore(pos, expected)) + { + if (!ParserTablesInSelectQuery(false).parse(pos, tables, expected)) + return false; + } + /// SELECT [ALL/DISTINCT [ON (expr_list)]] [TOP N [WITH TIES]] expr_list { bool has_all = false; @@ -166,7 +173,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } /// FROM database.table or FROM table or FROM (subquery) or FROM tableFunction(...) - if (s_from.ignore(pos, expected)) + if (!tables && s_from.ignore(pos, expected)) { if (!ParserTablesInSelectQuery().parse(pos, tables, expected)) return false; diff --git a/src/Parsers/ParserShowTablesQuery.cpp b/src/Parsers/ParserShowTablesQuery.cpp index 3d47358870d..1647dd9a5b4 100644 --- a/src/Parsers/ParserShowTablesQuery.cpp +++ b/src/Parsers/ParserShowTablesQuery.cpp @@ -18,6 +18,7 @@ namespace DB bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_show("SHOW"); + ParserKeyword s_full("FULL"); ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_tables("TABLES"); ParserKeyword s_databases("DATABASES"); @@ -46,6 +47,11 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!s_show.ignore(pos, expected)) return false; + if (s_full.ignore(pos, expected)) + { + query->full = true; + } + if (s_databases.ignore(pos, expected)) { query->databases = true; diff --git a/src/Parsers/ParserShowTablesQuery.h b/src/Parsers/ParserShowTablesQuery.h index 3b8bb033275..1b679c2e85a 100644 --- a/src/Parsers/ParserShowTablesQuery.h +++ b/src/Parsers/ParserShowTablesQuery.h @@ -14,7 +14,7 @@ namespace DB class ParserShowTablesQuery : public IParserBase { protected: - const char * getName() const override { return "SHOW [TEMPORARY] TABLES|DATABASES|CLUSTERS|CLUSTER 'name' [[NOT] [I]LIKE 'str'] [LIMIT expr]"; } + const char * getName() const override { return "SHOW [FULL] [TEMPORARY] TABLES|DATABASES|CLUSTERS|CLUSTER 'name' [[NOT] [I]LIKE 'str'] [LIMIT expr]"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index cff4c959267..2247167c66e 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -21,9 +21,9 @@ bool ParserTableExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec { auto res = std::make_shared(); - if (!ParserWithOptionalAlias(std::make_unique(), true).parse(pos, res->subquery, expected) - && !ParserWithOptionalAlias(std::make_unique(false, true), true).parse(pos, res->table_function, expected) - && !ParserWithOptionalAlias(std::make_unique(true, true), true) + if (!ParserWithOptionalAlias(std::make_unique(), allow_alias_without_as_keyword).parse(pos, res->subquery, expected) + && !ParserWithOptionalAlias(std::make_unique(false, true), allow_alias_without_as_keyword).parse(pos, res->table_function, expected) + && !ParserWithOptionalAlias(std::make_unique(true, true), allow_alias_without_as_keyword) .parse(pos, res->database_and_table_name, expected)) return false; @@ -126,7 +126,7 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec if (is_first) { - if (!ParserTableExpression().parse(pos, res->table_expression, expected)) + if (!ParserTableExpression(allow_alias_without_as_keyword).parse(pos, res->table_expression, expected)) return false; } else if (ParserArrayJoin().parse(pos, res->array_join, expected)) @@ -200,7 +200,7 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec return false; } - if (!ParserTableExpression().parse(pos, res->table_expression, expected)) + if (!ParserTableExpression(allow_alias_without_as_keyword).parse(pos, res->table_expression, expected)) return false; if (table_join->kind != JoinKind::Comma @@ -261,12 +261,12 @@ bool ParserTablesInSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ASTPtr child; - if (ParserTablesInSelectQueryElement(true).parse(pos, child, expected)) + if (ParserTablesInSelectQueryElement(true, allow_alias_without_as_keyword).parse(pos, child, expected)) res->children.emplace_back(child); else return false; - while (ParserTablesInSelectQueryElement(false).parse(pos, child, expected)) + while (ParserTablesInSelectQueryElement(false, allow_alias_without_as_keyword).parse(pos, child, expected)) res->children.emplace_back(child); node = res; diff --git a/src/Parsers/ParserTablesInSelectQuery.h b/src/Parsers/ParserTablesInSelectQuery.h index 772f1992f4d..428b1482663 100644 --- a/src/Parsers/ParserTablesInSelectQuery.h +++ b/src/Parsers/ParserTablesInSelectQuery.h @@ -12,16 +12,24 @@ struct ASTTableJoin; */ class ParserTablesInSelectQuery : public IParserBase { +public: + explicit ParserTablesInSelectQuery(bool allow_alias_without_as_keyword_ = true) + : allow_alias_without_as_keyword(allow_alias_without_as_keyword_) {} + protected: const char * getName() const override { return "table, table function, subquery or list of joined tables"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + bool allow_alias_without_as_keyword; }; class ParserTablesInSelectQueryElement : public IParserBase { public: - explicit ParserTablesInSelectQueryElement(bool is_first_) : is_first(is_first_) {} + explicit ParserTablesInSelectQueryElement(bool is_first_, bool allow_alias_without_as_keyword_ = true) + : is_first(is_first_), allow_alias_without_as_keyword(allow_alias_without_as_keyword_) {} protected: const char * getName() const override { return "table, table function, subquery or list of joined tables"; } @@ -29,6 +37,7 @@ protected: private: bool is_first; + bool allow_alias_without_as_keyword; static void parseJoinStrictness(Pos & pos, ASTTableJoin & table_join); }; @@ -36,9 +45,16 @@ private: class ParserTableExpression : public IParserBase { +public: + explicit ParserTableExpression(bool allow_alias_without_as_keyword_ = true) + : allow_alias_without_as_keyword(allow_alias_without_as_keyword_) {} + protected: const char * getName() const override { return "table or subquery or table function"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + bool allow_alias_without_as_keyword; }; diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index aec5a578774..e4585ae6a1d 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -455,6 +455,7 @@ void Planner::buildQueryPlanIfNeeded() ); SortDescription group_by_sort_description; + SortDescription sort_description_for_merging; auto merge_threads = settings.max_threads; auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads @@ -477,7 +478,6 @@ void Planner::buildQueryPlanIfNeeded() const bool should_produce_results_in_order_of_bucket_number = select_query_options.to_stage == QueryProcessingStage::WithMergeableState && settings.distributed_aggregation_memory_efficient; - InputOrderInfoPtr input_order_info; bool aggregate_final = select_query_options.to_stage > QueryProcessingStage::WithMergeableState && !query_node.isGroupByWithTotals() && !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube(); @@ -493,7 +493,7 @@ void Planner::buildQueryPlanIfNeeded() temporary_data_merge_threads, storage_has_evenly_distributed_read, settings.group_by_use_nulls, - std::move(input_order_info), + std::move(sort_description_for_merging), std::move(group_by_sort_description), should_produce_results_in_order_of_bucket_number, settings.enable_memory_bound_merging_of_aggregation_results); diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index 047a55d3f90..a41cf687b39 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -59,7 +59,7 @@ std::vector BinaryFormatReader::readTypes() bool BinaryFormatReader::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/) { - serialization->deserializeBinary(column, *in); + serialization->deserializeBinary(column, *in, format_settings); return true; } @@ -92,7 +92,7 @@ void BinaryFormatReader::skipField(size_t file_column) if (file_column >= read_data_types.size()) throw Exception(ErrorCodes::CANNOT_SKIP_UNKNOWN_FIELD, "Cannot skip unknown field in RowBinaryWithNames format, because it's type is unknown"); Field field; - read_data_types[file_column]->getDefaultSerialization()->deserializeBinary(field, *in); + read_data_types[file_column]->getDefaultSerialization()->deserializeBinary(field, *in, format_settings); } BinaryWithNamesAndTypesSchemaReader::BinaryWithNamesAndTypesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) diff --git a/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp index 60b722569a2..c9ed8e03449 100644 --- a/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp @@ -10,8 +10,8 @@ namespace DB { -BinaryRowOutputFormat::BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_, const RowOutputFormatParams & params_) - : IRowOutputFormat(header, out_, params_), with_names(with_names_), with_types(with_types_) +BinaryRowOutputFormat::BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) + : IRowOutputFormat(header, out_, params_), with_names(with_names_), with_types(with_types_), format_settings(format_settings_) { } @@ -44,7 +44,7 @@ void BinaryRowOutputFormat::writePrefix() void BinaryRowOutputFormat::writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) { - serialization.serializeBinary(column, row_num, out); + serialization.serializeBinary(column, row_num, out, format_settings); } @@ -56,9 +56,9 @@ void registerOutputFormatRowBinary(FormatFactory & factory) WriteBuffer & buf, const Block & sample, const RowOutputFormatParams & params, - const FormatSettings &) + const FormatSettings & format_settings) { - return std::make_shared(buf, sample, with_names, with_types, params); + return std::make_shared(buf, sample, with_names, with_types, params, format_settings); }); factory.markOutputFormatSupportsParallelFormatting(format_name); }; diff --git a/src/Processors/Formats/Impl/BinaryRowOutputFormat.h b/src/Processors/Formats/Impl/BinaryRowOutputFormat.h index 40894608677..e8198cb6ee0 100644 --- a/src/Processors/Formats/Impl/BinaryRowOutputFormat.h +++ b/src/Processors/Formats/Impl/BinaryRowOutputFormat.h @@ -17,7 +17,7 @@ class WriteBuffer; class BinaryRowOutputFormat final: public IRowOutputFormat { public: - BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_, const RowOutputFormatParams & params_); + BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); String getName() const override { return "BinaryRowOutputFormat"; } @@ -29,6 +29,7 @@ private: bool with_names; bool with_types; + const FormatSettings format_settings; }; } diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 8ef547ee8ab..86039342c49 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -18,6 +18,8 @@ #include #include #include +#include +#include namespace DB { @@ -25,21 +27,20 @@ namespace DB static bool memoryBoundMergingWillBeUsed( bool should_produce_results_in_order_of_bucket_number, bool memory_bound_merging_of_aggregation_results_enabled, - InputOrderInfoPtr group_by_info) + SortDescription sort_description_for_merging) { - return should_produce_results_in_order_of_bucket_number && memory_bound_merging_of_aggregation_results_enabled && group_by_info; + return should_produce_results_in_order_of_bucket_number && memory_bound_merging_of_aggregation_results_enabled && !sort_description_for_merging.empty(); } -static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_of_bucket_number, bool memory_bound_merging_will_be_used) +static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_of_bucket_number) { return ITransformingStep::Traits { { .preserves_distinct_columns = false, /// Actually, we may check that distinct names are in aggregation keys - .returns_single_stream = should_produce_results_in_order_of_bucket_number || memory_bound_merging_will_be_used, + .returns_single_stream = should_produce_results_in_order_of_bucket_number, .preserves_number_of_streams = false, .preserves_sorting = false, - .can_enforce_sorting_properties_in_distributed_query = memory_bound_merging_will_be_used, }, { .preserves_number_of_rows = false, @@ -97,17 +98,14 @@ AggregatingStep::AggregatingStep( size_t temporary_data_merge_threads_, bool storage_has_evenly_distributed_read_, bool group_by_use_nulls_, - InputOrderInfoPtr group_by_info_, + SortDescription sort_description_for_merging_, SortDescription group_by_sort_description_, bool should_produce_results_in_order_of_bucket_number_, bool memory_bound_merging_of_aggregation_results_enabled_) : ITransformingStep( input_stream_, appendGroupingColumn(params_.getHeader(input_stream_.header, final_), params_.keys, grouping_sets_params_, group_by_use_nulls_), - getTraits( - should_produce_results_in_order_of_bucket_number_, - DB::memoryBoundMergingWillBeUsed( - should_produce_results_in_order_of_bucket_number_, memory_bound_merging_of_aggregation_results_enabled_, group_by_info_)), + getTraits(should_produce_results_in_order_of_bucket_number_), false) , params(std::move(params_)) , grouping_sets_params(std::move(grouping_sets_params_)) @@ -118,7 +116,7 @@ AggregatingStep::AggregatingStep( , temporary_data_merge_threads(temporary_data_merge_threads_) , storage_has_evenly_distributed_read(storage_has_evenly_distributed_read_) , group_by_use_nulls(group_by_use_nulls_) - , group_by_info(std::move(group_by_info_)) + , sort_description_for_merging(std::move(sort_description_for_merging_)) , group_by_sort_description(std::move(group_by_sort_description_)) , should_produce_results_in_order_of_bucket_number(should_produce_results_in_order_of_bucket_number_) , memory_bound_merging_of_aggregation_results_enabled(memory_bound_merging_of_aggregation_results_enabled_) @@ -130,6 +128,19 @@ AggregatingStep::AggregatingStep( } } +void AggregatingStep::applyOrder(SortDescription sort_description_for_merging_, SortDescription group_by_sort_description_) +{ + sort_description_for_merging = std::move(sort_description_for_merging_); + group_by_sort_description = std::move(group_by_sort_description_); + + if (memoryBoundMergingWillBeUsed()) + { + output_stream->sort_description = group_by_sort_description; + output_stream->sort_scope = DataStream::SortScope::Global; + output_stream->has_single_port = true; + } +} + void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { QueryPipelineProcessorsCollector collector(pipeline, this); @@ -140,7 +151,7 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B bool allow_to_use_two_level_group_by = pipeline.getNumStreams() > 1 || params.max_bytes_before_external_group_by != 0; /// optimize_aggregation_in_order - if (group_by_info) + if (!sort_description_for_merging.empty()) { /// two-level aggregation is not supported anyway for in order aggregation. allow_to_use_two_level_group_by = false; @@ -320,7 +331,7 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B return; } - if (group_by_info) + if (!sort_description_for_merging.empty()) { if (pipeline.getNumStreams() > 1) { @@ -340,7 +351,7 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B /// So, we reduce 'max_bytes' value for aggregation in 'merge_threads' times. return std::make_shared( header, transform_params, - group_by_info, group_by_sort_description, + sort_description_for_merging, group_by_sort_description, max_block_size, aggregation_in_order_max_block_bytes / merge_threads, many_data, counter++); }); @@ -379,7 +390,7 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B { return std::make_shared( header, transform_params, - group_by_info, group_by_sort_description, + sort_description_for_merging, group_by_sort_description, max_block_size, aggregation_in_order_max_block_bytes); }); @@ -427,11 +438,18 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B void AggregatingStep::describeActions(FormatSettings & settings) const { params.explain(settings.out, settings.offset); + if (!sort_description_for_merging.empty()) + { + String prefix(settings.offset, settings.indent_char); + settings.out << prefix << "Order: " << dumpSortDescription(sort_description_for_merging) << '\n'; + } } void AggregatingStep::describeActions(JSONBuilder::JSONMap & map) const { params.explain(map); + if (!sort_description_for_merging.empty()) + map.add("Order", dumpSortDescription(sort_description_for_merging)); } void AggregatingStep::describePipeline(FormatSettings & settings) const @@ -455,17 +473,10 @@ void AggregatingStep::updateOutputStream() getDataStreamTraits()); } -void AggregatingStep::adjustSettingsToEnforceSortingPropertiesInDistributedQuery(ContextMutablePtr context) const -{ - context->setSetting("enable_memory_bound_merging_of_aggregation_results", true); - context->setSetting("optimize_aggregation_in_order", true); - context->setSetting("force_aggregation_in_order", true); -} - bool AggregatingStep::memoryBoundMergingWillBeUsed() const { return DB::memoryBoundMergingWillBeUsed( - should_produce_results_in_order_of_bucket_number, memory_bound_merging_of_aggregation_results_enabled, group_by_info); + should_produce_results_in_order_of_bucket_number, memory_bound_merging_of_aggregation_results_enabled, sort_description_for_merging); } } diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index 84c6610e90d..9cb56432797 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -37,7 +37,7 @@ public: size_t temporary_data_merge_threads_, bool storage_has_evenly_distributed_read_, bool group_by_use_nulls_, - InputOrderInfoPtr group_by_info_, + SortDescription sort_description_for_merging_, SortDescription group_by_sort_description_, bool should_produce_results_in_order_of_bucket_number_, bool memory_bound_merging_of_aggregation_results_enabled_); @@ -53,13 +53,14 @@ public: const Aggregator::Params & getParams() const { return params; } - void adjustSettingsToEnforceSortingPropertiesInDistributedQuery(ContextMutablePtr context) const override; + bool inOrder() const { return !sort_description_for_merging.empty(); } + bool isGroupingSets() const { return !grouping_sets_params.empty(); } + void applyOrder(SortDescription sort_description_for_merging_, SortDescription group_by_sort_description_); + bool memoryBoundMergingWillBeUsed() const; private: void updateOutputStream() override; - bool memoryBoundMergingWillBeUsed() const; - Aggregator::Params params; GroupingSetsParamsList grouping_sets_params; bool final; @@ -71,7 +72,11 @@ private: bool storage_has_evenly_distributed_read; bool group_by_use_nulls; - InputOrderInfoPtr group_by_info; + /// Both sort descriptions are needed for aggregate-in-order optimisation. + /// Both sort descriptions are subset of GROUP BY key columns (or monotonic functions over it). + /// Sort description for merging is a sort description for input and a prefix of group_by_sort_description. + /// group_by_sort_description contains all GROUP BY keys and is used for final merging of aggregated data. + SortDescription sort_description_for_merging; SortDescription group_by_sort_description; /// These settings are used to determine if we should resize pipeline to 1 at the end. diff --git a/src/Processors/QueryPlan/ITransformingStep.h b/src/Processors/QueryPlan/ITransformingStep.h index a4124dda806..8b16e982af5 100644 --- a/src/Processors/QueryPlan/ITransformingStep.h +++ b/src/Processors/QueryPlan/ITransformingStep.h @@ -34,9 +34,6 @@ public: /// Doesn't change row order. /// Examples: true for FilterStep, false for PartialSortingStep bool preserves_sorting; - - /// See adjustSettingsToEnforceSortingPropertiesInDistributedQuery(). - bool can_enforce_sorting_properties_in_distributed_query = false; }; /// This flags are used by QueryPlan optimizers. diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.cpp b/src/Processors/QueryPlan/MergingAggregatedStep.cpp index 10b986579cc..9d172417490 100644 --- a/src/Processors/QueryPlan/MergingAggregatedStep.cpp +++ b/src/Processors/QueryPlan/MergingAggregatedStep.cpp @@ -19,7 +19,7 @@ static bool memoryBoundMergingWillBeUsed( && input_stream.sort_scope >= DataStream::SortScope::Stream && input_stream.sort_description.hasPrefix(group_by_sort_description); } -static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_of_bucket_number, bool memory_bound_merging_will_be_used) +static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_of_bucket_number) { return ITransformingStep::Traits { @@ -28,7 +28,6 @@ static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_ .returns_single_stream = should_produce_results_in_order_of_bucket_number, .preserves_number_of_streams = false, .preserves_sorting = false, - .can_enforce_sorting_properties_in_distributed_query = memory_bound_merging_will_be_used, }, { .preserves_number_of_rows = false, @@ -51,10 +50,7 @@ MergingAggregatedStep::MergingAggregatedStep( : ITransformingStep( input_stream_, params_.getHeader(input_stream_.header, final_), - getTraits( - should_produce_results_in_order_of_bucket_number_, - DB::memoryBoundMergingWillBeUsed( - input_stream_, memory_bound_merging_of_aggregation_results_enabled_, group_by_sort_description_))) + getTraits(should_produce_results_in_order_of_bucket_number_)) , params(std::move(params_)) , final(final_) , memory_efficient_aggregation(memory_efficient_aggregation_) @@ -77,6 +73,19 @@ MergingAggregatedStep::MergingAggregatedStep( } } +void MergingAggregatedStep::updateInputSortDescription(SortDescription sort_description, DataStream::SortScope sort_scope) +{ + auto & input_stream = input_streams.front(); + input_stream.sort_scope = sort_scope; + input_stream.sort_description = sort_description; + + if (memoryBoundMergingWillBeUsed() && should_produce_results_in_order_of_bucket_number) + { + output_stream->sort_description = group_by_sort_description; + output_stream->sort_scope = DataStream::SortScope::Global; + } +} + void MergingAggregatedStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { auto transform_params = std::make_shared(pipeline.getHeader(), std::move(params), final); @@ -151,11 +160,6 @@ void MergingAggregatedStep::updateOutputStream() output_stream->distinct_columns.insert(key); } -void MergingAggregatedStep::adjustSettingsToEnforceSortingPropertiesInDistributedQuery(ContextMutablePtr context) const -{ - context->setSetting("enable_memory_bound_merging_of_aggregation_results", true); -} - bool MergingAggregatedStep::memoryBoundMergingWillBeUsed() const { return DB::memoryBoundMergingWillBeUsed( diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.h b/src/Processors/QueryPlan/MergingAggregatedStep.h index 24bf6cfdd2b..d65f23cd3c8 100644 --- a/src/Processors/QueryPlan/MergingAggregatedStep.h +++ b/src/Processors/QueryPlan/MergingAggregatedStep.h @@ -33,12 +33,13 @@ public: void describeActions(JSONBuilder::JSONMap & map) const override; void describeActions(FormatSettings & settings) const override; - void adjustSettingsToEnforceSortingPropertiesInDistributedQuery(ContextMutablePtr context) const override; + void updateInputSortDescription(SortDescription input_sort_description, DataStream::SortScope sort_scope); + + bool memoryBoundMergingWillBeUsed() const; private: void updateOutputStream() override; - bool memoryBoundMergingWillBeUsed() const; Aggregator::Params params; bool final; diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index 973304b366b..7f435463d64 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -92,6 +92,11 @@ using Stack = std::vector; /// Second pass optimizations void optimizePrimaryKeyCondition(const Stack & stack); void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes); +void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &); + +/// Enable memory bound merging of aggregation states for remote queries +/// in case it was enabled for local plan +void enableMemoryBoundMerging(QueryPlan::Node & node, QueryPlan::Nodes &); } diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp index 88c0f346e9a..00abd803d2a 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp @@ -13,6 +13,7 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const settings.filter_push_down = from.query_plan_filter_push_down; settings.distinct_in_order = from.optimize_distinct_in_order; settings.read_in_order = from.optimize_read_in_order && from.query_plan_read_in_order; + settings.aggregation_in_order = from.optimize_aggregation_in_order && from.query_plan_aggregation_in_order; return settings; } diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h index 7185d2fe869..d4989b86b68 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -27,6 +27,9 @@ struct QueryPlanOptimizationSettings /// If read-in-order optimisation is enabled bool read_in_order = true; + /// If aggregation-in-order optimisation is enabled + bool aggregation_in_order = false; + static QueryPlanOptimizationSettings fromSettings(const Settings & from); static QueryPlanOptimizationSettings fromContext(ContextPtr from); }; diff --git a/src/Processors/QueryPlan/Optimizations/enableMemoryBoundMerging.cpp b/src/Processors/QueryPlan/Optimizations/enableMemoryBoundMerging.cpp new file mode 100644 index 00000000000..9be34378c6e --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/enableMemoryBoundMerging.cpp @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include + +namespace DB::QueryPlanOptimizations +{ + +/// We are trying to find a part of plan like +/// +/// - ReadFromRemote (x N) +/// - Union - ReadFromParallelRemoteReplicasStep (x M) +/// - Aggregating/MergingAggregated +/// +/// and enable memory bound merging for remote steps if it was enabled for local aggregation. +void enableMemoryBoundMerging(QueryPlan::Node & node, QueryPlan::Nodes &) +{ + auto * root_mergine_aggeregated = typeid_cast(node.step.get()); + if (!root_mergine_aggeregated) + return; + + const auto & union_node = *node.children.front(); + auto * union_step = typeid_cast(union_node.step.get()); + if (!union_step) + return; + + std::vector reading_steps; + std::vector async_reading_steps; + IQueryPlanStep * local_plan = nullptr; + + reading_steps.reserve((union_node.children.size())); + async_reading_steps.reserve((union_node.children.size())); + + for (const auto & child : union_node.children) + { + auto * child_node = child->step.get(); + if (auto * reading_step = typeid_cast(child_node)) + reading_steps.push_back(reading_step); + else if (auto * async_reading_step = typeid_cast(child_node)) + async_reading_steps.push_back(async_reading_step); + else if (local_plan) + /// Usually there is a single local plan. + /// TODO: we can support many local plans and calculate common sort description prefix. Do we need it? + return; + else + local_plan = child_node; + } + + /// We determine output stream sort properties by a local plan (local because otherwise table could be unknown). + /// If no local shard exist for this cluster, no sort properties will be provided, c'est la vie. + if (local_plan == nullptr || (reading_steps.empty() && async_reading_steps.empty())) + return; + + SortDescription sort_description; + bool enforce_aggregation_in_order = false; + + if (auto * aggregating_step = typeid_cast(local_plan)) + { + if (aggregating_step->memoryBoundMergingWillBeUsed()) + { + sort_description = aggregating_step->getOutputStream().sort_description; + enforce_aggregation_in_order = true; + } + } + else if (auto * mergine_aggeregated = typeid_cast(local_plan)) + { + if (mergine_aggeregated->memoryBoundMergingWillBeUsed()) + { + sort_description = mergine_aggeregated->getOutputStream().sort_description; + } + } + + if (sort_description.empty()) + return; + + for (auto & reading : reading_steps) + { + reading->enforceSorting(sort_description); + if (enforce_aggregation_in_order) + reading->enforceAggregationInOrder(); + } + + for (auto & reading : async_reading_steps) + { + reading->enforceSorting(sort_description); + if (enforce_aggregation_in_order) + reading->enforceAggregationInOrder(); + } + + root_mergine_aggeregated->updateInputSortDescription(sort_description, DataStream::SortScope::Stream); +} + +} diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp index ac131a1b67c..bdf8f24f9d6 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp @@ -679,6 +679,210 @@ InputOrderInfoPtr buildInputOrderInfo( return std::make_shared(order_key_prefix_descr, next_sort_key, read_direction, limit); } +/// We really need three different sort descriptions here. +/// For example: +/// +/// create table tab (a Int32, b Int32, c Int32, d Int32) engine = MergeTree order by (a, b, c); +/// select a, any(b), c, d from tab where b = 1 group by a, c, d order by c, d; +/// +/// We would like to have: +/// (a, b, c) - a sort description for reading from table (it's into input_order) +/// (a, c) - a sort description for merging (an input of AggregatingInOrderTransfrom is sorted by this GROUP BY keys) +/// (a, c, d) - a group by soer description (an input of FinishAggregatingInOrderTransform is sorted by all GROUP BY keys) +/// +/// Sort description from input_order is not actually used. ReadFromMergeTree reads only PK prefix size. +/// We should remove it later. +struct AggregationInputOrder +{ + InputOrderInfoPtr input_order; + SortDescription sort_description_for_merging; + SortDescription group_by_sort_description; +}; + +AggregationInputOrder buildInputOrderInfo( + const FixedColumns & fixed_columns, + const ActionsDAGPtr & dag, + const Names & group_by_keys, + const ActionsDAG & sorting_key_dag, + const Names & sorting_key_columns) +{ + MatchedTrees::Matches matches; + FixedColumns fixed_key_columns; + + /// For every column in PK find any match from GROUP BY key. + using ReverseMatches = std::unordered_map; + ReverseMatches reverse_matches; + + if (dag) + { + matches = matchTrees(sorting_key_dag, *dag); + + for (const auto & [node, match] : matches) + { + if (!match.monotonicity || match.monotonicity->strict) + { + if (match.node && fixed_columns.contains(node)) + fixed_key_columns.insert(match.node); + } + } + + enreachFixedColumns(sorting_key_dag, fixed_key_columns); + + for (auto it = matches.cbegin(); it != matches.cend(); ++it) + { + const MatchedTrees::Match * match = &it->second; + if (match->node) + { + auto [jt, inserted] = reverse_matches.emplace(match->node, it); + if (!inserted) + { + /// Find the best match for PK node. + /// Direct match > strict monotonic > monotonic. + const MatchedTrees::Match * prev_match = &jt->second->second; + bool is_better = prev_match->monotonicity && !match->monotonicity; + if (!is_better) + { + bool both_monotionic = prev_match->monotonicity && match->monotonicity; + is_better = both_monotionic && match->monotonicity->strict && !prev_match->monotonicity->strict; + } + + if (is_better) + jt->second = it; + } + } + } + } + + /// This is a result direction we will read from MergeTree + /// 1 - in order, + /// -1 - in reverse order, + /// 0 - usual read, don't apply optimization + /// + /// So far, 0 means any direction is possible. It is ok for constant prefix. + int read_direction = 0; + size_t next_sort_key = 0; + std::unordered_set not_matched_group_by_keys(group_by_keys.begin(), group_by_keys.end()); + + SortDescription group_by_sort_description; + group_by_sort_description.reserve(group_by_keys.size()); + + SortDescription order_key_prefix_descr; + order_key_prefix_descr.reserve(sorting_key_columns.size()); + + while (!not_matched_group_by_keys.empty() && next_sort_key < sorting_key_columns.size()) + { + const auto & sorting_key_column = sorting_key_columns[next_sort_key]; + + /// Direction for current sort key. + int current_direction = 0; + bool strict_monotonic = true; + std::unordered_set::iterator group_by_key_it; + + const ActionsDAG::Node * sort_column_node = sorting_key_dag.tryFindInOutputs(sorting_key_column); + /// This should not happen. + if (!sort_column_node) + break; + + if (!dag) + { + /// This is possible if there were no Expression or Filter steps in Plan. + /// Example: SELECT * FROM tab ORDER BY a, b + + if (sort_column_node->type != ActionsDAG::ActionType::INPUT) + break; + + group_by_key_it = not_matched_group_by_keys.find(sorting_key_column); + if (group_by_key_it == not_matched_group_by_keys.end()) + break; + + current_direction = 1; + + //std::cerr << "====== (no dag) Found direct match" << std::endl; + ++next_sort_key; + } + else + { + const MatchedTrees::Match * match = nullptr; + const ActionsDAG::Node * group_by_key_node = nullptr; + if (const auto match_it = reverse_matches.find(sort_column_node); match_it != reverse_matches.end()) + { + group_by_key_node = match_it->second->first; + match = &match_it->second->second; + } + + //std::cerr << "====== Finding match for " << sort_column_node->result_name << ' ' << static_cast(sort_column_node) << std::endl; + + if (match && match->node) + group_by_key_it = not_matched_group_by_keys.find(group_by_key_node->result_name); + + if (match && match->node && group_by_key_it != not_matched_group_by_keys.end()) + { + //std::cerr << "====== Found direct match" << std::endl; + + current_direction = 1; + if (match->monotonicity) + { + current_direction *= match->monotonicity->direction; + strict_monotonic = match->monotonicity->strict; + } + + ++next_sort_key; + } + else if (fixed_key_columns.contains(sort_column_node)) + { + //std::cerr << "+++++++++ Found fixed key by match" << std::endl; + ++next_sort_key; + } + else + break; + } + + /// read_direction == 0 means we can choose any global direction. + /// current_direction == 0 means current key if fixed and any direction is possible for it. + if (current_direction && read_direction && current_direction != read_direction) + break; + + if (read_direction == 0 && current_direction != 0) + read_direction = current_direction; + + if (current_direction) + { + /// Aggregation in order will always read in table order. + /// Here, current_direction is a direction which will be applied to every key. + /// Example: + /// CREATE TABLE t (x, y, z) ENGINE = MergeTree ORDER BY (x, y) + /// SELECT ... FROM t GROUP BY negate(y), negate(x), z + /// Here, current_direction will be -1 cause negate() is negative montonic, + /// Prefix sort description for reading will be (negate(y) DESC, negate(x) DESC), + /// Sort description for GROUP BY will be (negate(y) DESC, negate(x) DESC, z). + //std::cerr << "---- adding " << std::string(*group_by_key_it) << std::endl; + group_by_sort_description.emplace_back(SortColumnDescription(std::string(*group_by_key_it), current_direction)); + order_key_prefix_descr.emplace_back(SortColumnDescription(std::string(*group_by_key_it), current_direction)); + not_matched_group_by_keys.erase(group_by_key_it); + } + else + { + /// If column is fixed, will read it in table order as well. + //std::cerr << "---- adding " << sorting_key_column << std::endl; + order_key_prefix_descr.emplace_back(SortColumnDescription(sorting_key_column, 1)); + } + + if (current_direction && !strict_monotonic) + break; + } + + if (read_direction == 0 || group_by_sort_description.empty()) + return {}; + + SortDescription sort_description_for_merging = group_by_sort_description; + + for (const auto & key : not_matched_group_by_keys) + group_by_sort_description.emplace_back(SortColumnDescription(std::string(key))); + + auto input_order = std::make_shared(order_key_prefix_descr, next_sort_key, /*read_direction*/ 1, /* limit */ 0); + return { std::move(input_order), std::move(sort_description_for_merging), std::move(group_by_sort_description) }; +} + InputOrderInfoPtr buildInputOrderInfo( ReadFromMergeTree * reading, const FixedColumns & fixed_columns, @@ -733,6 +937,56 @@ InputOrderInfoPtr buildInputOrderInfo( return order_info; } +AggregationInputOrder buildInputOrderInfo( + ReadFromMergeTree * reading, + const FixedColumns & fixed_columns, + const ActionsDAGPtr & dag, + const Names & group_by_keys) +{ + const auto & sorting_key = reading->getStorageMetadata()->getSortingKey(); + const auto & sorting_key_columns = sorting_key.column_names; + + return buildInputOrderInfo( + fixed_columns, + dag, group_by_keys, + sorting_key.expression->getActionsDAG(), sorting_key_columns); +} + +AggregationInputOrder buildInputOrderInfo( + ReadFromMerge * merge, + const FixedColumns & fixed_columns, + const ActionsDAGPtr & dag, + const Names & group_by_keys) +{ + const auto & tables = merge->getSelectedTables(); + + AggregationInputOrder order_info; + for (const auto & table : tables) + { + auto storage = std::get(table); + const auto & sorting_key = storage->getInMemoryMetadataPtr()->getSortingKey(); + const auto & sorting_key_columns = sorting_key.column_names; + + if (sorting_key_columns.empty()) + return {}; + + auto table_order_info = buildInputOrderInfo( + fixed_columns, + dag, group_by_keys, + sorting_key.expression->getActionsDAG(), sorting_key_columns); + + if (!table_order_info.input_order) + return {}; + + if (!order_info.input_order) + order_info = table_order_info; + else if (*order_info.input_order != *table_order_info.input_order) + return {}; + } + + return order_info; +} + InputOrderInfoPtr buildInputOrderInfo(SortingStep & sorting, QueryPlan::Node & node) { QueryPlan::Node * reading_node = findReadingStep(node); @@ -781,6 +1035,53 @@ InputOrderInfoPtr buildInputOrderInfo(SortingStep & sorting, QueryPlan::Node & n return nullptr; } +AggregationInputOrder buildInputOrderInfo(AggregatingStep & aggregating, QueryPlan::Node & node) +{ + QueryPlan::Node * reading_node = findReadingStep(node); + if (!reading_node) + return {}; + + const auto & keys = aggregating.getParams().keys; + size_t limit = 0; + + ActionsDAGPtr dag; + FixedColumns fixed_columns; + buildSortingDAG(node, dag, fixed_columns, limit); + + if (dag && !fixed_columns.empty()) + enreachFixedColumns(*dag, fixed_columns); + + if (auto * reading = typeid_cast(reading_node->step.get())) + { + auto order_info = buildInputOrderInfo( + reading, + fixed_columns, + dag, keys); + + if (order_info.input_order) + reading->requestReadingInOrder( + order_info.input_order->used_prefix_of_sorting_key_size, + order_info.input_order->direction, + order_info.input_order->limit); + + return order_info; + } + else if (auto * merge = typeid_cast(reading_node->step.get())) + { + auto order_info = buildInputOrderInfo( + merge, + fixed_columns, + dag, keys); + + if (order_info.input_order) + merge->requestReadingInOrder(order_info.input_order); + + return order_info; + } + + return {}; +} + void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) { if (node.children.size() != 1) @@ -860,6 +1161,25 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes) } } +void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &) +{ + if (node.children.size() != 1) + return; + + auto * aggregating = typeid_cast(node.step.get()); + if (!aggregating) + return; + + if (aggregating->inOrder() || aggregating->isGroupingSets()) + return; + + /// TODO: maybe add support for UNION later. + if (auto order_info = buildInputOrderInfo(*aggregating, *node.children.front()); order_info.input_order) + { + aggregating->applyOrder(std::move(order_info.sort_description_for_merging), std::move(order_info.group_by_sort_description)); + } +} + /// This optimisation is obsolete and will be removed. /// optimizeReadInOrder covers it. size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, QueryPlan::Nodes & /*nodes*/) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index f514e145a92..13095dfad47 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include namespace DB @@ -112,6 +114,9 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (optimization_settings.read_in_order) optimizeReadInOrder(*frame.node, nodes); + if (optimization_settings.aggregation_in_order) + optimizeAggregationInOrder(*frame.node, nodes); + if (optimization_settings.distinct_in_order) tryDistinctReadInOrder(frame.node); } @@ -126,6 +131,7 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s } optimizePrimaryKeyCondition(stack); + enableMemoryBoundMerging(*frame.node, nodes); stack.pop_back(); } diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 81f2fa4b65f..f23e99e1bcc 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -25,6 +25,7 @@ namespace DB namespace ErrorCodes { extern const int ALL_CONNECTION_TRIES_FAILED; + extern const int LOGICAL_ERROR; } static void addConvertingActions(Pipe & pipe, const Block & header) @@ -51,6 +52,32 @@ static void addConvertingActions(Pipe & pipe, const Block & header) }); } +static void enforceSorting(QueryProcessingStage::Enum stage, DataStream & output_stream, Context & context, SortDescription output_sort_description) +{ + if (stage != QueryProcessingStage::WithMergeableState) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot enforce sorting for ReadFromRemote step up to stage {}", + QueryProcessingStage::toString(stage)); + + context.setSetting("enable_memory_bound_merging_of_aggregation_results", true); + + output_stream.sort_description = std::move(output_sort_description); + output_stream.sort_scope = DataStream::SortScope::Stream; +} + +static void enforceAggregationInOrder(QueryProcessingStage::Enum stage, Context & context) +{ + if (stage != QueryProcessingStage::WithMergeableState) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot enforce aggregation in order for ReadFromRemote step up to stage {}", + QueryProcessingStage::toString(stage)); + + context.setSetting("optimize_aggregation_in_order", true); + context.setSetting("force_aggregation_in_order", true); +} + static String formattedAST(const ASTPtr & ast) { if (!ast) @@ -70,15 +97,13 @@ ReadFromRemote::ReadFromRemote( QueryProcessingStage::Enum stage_, StorageID main_table_, ASTPtr table_func_ptr_, - ContextPtr context_, + ContextMutablePtr context_, ThrottlerPtr throttler_, Scalars scalars_, Tables external_tables_, Poco::Logger * log_, UInt32 shard_count_, - std::shared_ptr storage_limits_, - SortDescription output_sort_description_, - DataStream::SortScope output_sort_scope_) + std::shared_ptr storage_limits_) : ISourceStep(DataStream{.header = std::move(header_)}) , shards(std::move(shards_)) , stage(stage_) @@ -92,8 +117,16 @@ ReadFromRemote::ReadFromRemote( , log(log_) , shard_count(shard_count_) { - output_stream->sort_description = std::move(output_sort_description_); - output_stream->sort_scope = output_sort_scope_; +} + +void ReadFromRemote::enforceSorting(SortDescription output_sort_description) +{ + DB::enforceSorting(stage, *output_stream, *context, output_sort_description); +} + +void ReadFromRemote::enforceAggregationInOrder() +{ + DB::enforceAggregationInOrder(stage, *context); } void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard) @@ -238,14 +271,12 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( QueryProcessingStage::Enum stage_, StorageID main_table_, ASTPtr table_func_ptr_, - ContextPtr context_, + ContextMutablePtr context_, ThrottlerPtr throttler_, Scalars scalars_, Tables external_tables_, Poco::Logger * log_, - std::shared_ptr storage_limits_, - SortDescription output_sort_description_, - DataStream::SortScope output_sort_scope_) + std::shared_ptr storage_limits_) : ISourceStep(DataStream{.header = std::move(header_)}) , coordinator(std::move(coordinator_)) , shard(std::move(shard_)) @@ -266,11 +297,17 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep( description.push_back(fmt::format("Replica: {}", address.host_name)); setStepDescription(boost::algorithm::join(description, ", ")); - - output_stream->sort_description = std::move(output_sort_description_); - output_stream->sort_scope = output_sort_scope_; } +void ReadFromParallelRemoteReplicasStep::enforceSorting(SortDescription output_sort_description) +{ + DB::enforceSorting(stage, *output_stream, *context, output_sort_description); +} + +void ReadFromParallelRemoteReplicasStep::enforceAggregationInOrder() +{ + DB::enforceAggregationInOrder(stage, *context); +} void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index 7c8bbddfe79..60a7cd90f3f 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -27,34 +27,29 @@ public: QueryProcessingStage::Enum stage_, StorageID main_table_, ASTPtr table_func_ptr_, - ContextPtr context_, + ContextMutablePtr context_, ThrottlerPtr throttler_, Scalars scalars_, Tables external_tables_, Poco::Logger * log_, UInt32 shard_count_, - std::shared_ptr storage_limits_, - SortDescription output_sort_description_, - DataStream::SortScope output_sort_scope_); + std::shared_ptr storage_limits_); String getName() const override { return "ReadFromRemote"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; -private: - enum class Mode - { - PerReplica, - PerShard - }; + void enforceSorting(SortDescription output_sort_description); + void enforceAggregationInOrder(); +private: ClusterProxy::SelectStreamFactory::Shards shards; QueryProcessingStage::Enum stage; StorageID main_table; ASTPtr table_func_ptr; - ContextPtr context; + ContextMutablePtr context; ThrottlerPtr throttler; Scalars scalars; @@ -80,19 +75,20 @@ public: QueryProcessingStage::Enum stage_, StorageID main_table_, ASTPtr table_func_ptr_, - ContextPtr context_, + ContextMutablePtr context_, ThrottlerPtr throttler_, Scalars scalars_, Tables external_tables_, Poco::Logger * log_, - std::shared_ptr storage_limits_, - SortDescription output_sort_description_, - DataStream::SortScope output_sort_scope_); + std::shared_ptr storage_limits_); String getName() const override { return "ReadFromRemoteParallelReplicas"; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + void enforceSorting(SortDescription output_sort_description); + void enforceAggregationInOrder(); + private: void addPipeForSingeReplica(Pipes & pipes, std::shared_ptr pool, IConnections::ReplicaInfo replica_info); @@ -104,7 +100,7 @@ private: StorageID main_table; ASTPtr table_func_ptr; - ContextPtr context; + ContextMutablePtr context; ThrottlerPtr throttler; Scalars scalars; diff --git a/src/Processors/QueryPlan/UnionStep.cpp b/src/Processors/QueryPlan/UnionStep.cpp index 6c990c5fd0b..6290c7417db 100644 --- a/src/Processors/QueryPlan/UnionStep.cpp +++ b/src/Processors/QueryPlan/UnionStep.cpp @@ -37,6 +37,11 @@ UnionStep::UnionStep(DataStreams input_streams_, size_t max_threads_) else output_stream = DataStream{.header = header}; + updateOutputSortDescription(); +} + +void UnionStep::updateOutputSortDescription() +{ SortDescription common_sort_description = input_streams.front().sort_description; DataStream::SortScope sort_scope = input_streams.front().sort_scope; for (const auto & input_stream : input_streams) diff --git a/src/Processors/QueryPlan/UnionStep.h b/src/Processors/QueryPlan/UnionStep.h index c23223bc6fa..a5a2f6b356e 100644 --- a/src/Processors/QueryPlan/UnionStep.h +++ b/src/Processors/QueryPlan/UnionStep.h @@ -19,6 +19,8 @@ public: size_t getMaxThreads() const { return max_threads; } + void updateOutputSortDescription(); + private: Block header; size_t max_threads; diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 4664dcae8dd..95e9b953840 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -11,11 +11,11 @@ namespace DB AggregatingInOrderTransform::AggregatingInOrderTransform( Block header, AggregatingTransformParamsPtr params_, - InputOrderInfoPtr group_by_info_, + const SortDescription & sort_description_for_merging, const SortDescription & group_by_description_, size_t max_block_size_, size_t max_block_bytes_) : AggregatingInOrderTransform(std::move(header), std::move(params_), - group_by_info_, group_by_description_, + sort_description_for_merging, group_by_description_, max_block_size_, max_block_bytes_, std::make_unique(1), 0) { @@ -23,7 +23,7 @@ AggregatingInOrderTransform::AggregatingInOrderTransform( AggregatingInOrderTransform::AggregatingInOrderTransform( Block header, AggregatingTransformParamsPtr params_, - InputOrderInfoPtr group_by_info_, + const SortDescription & sort_description_for_merging, const SortDescription & group_by_description_, size_t max_block_size_, size_t max_block_bytes_, ManyAggregatedDataPtr many_data_, size_t current_variant) @@ -32,7 +32,6 @@ AggregatingInOrderTransform::AggregatingInOrderTransform( , max_block_bytes(max_block_bytes_) , params(std::move(params_)) , aggregates_mask(getAggregatesMask(params->getHeader(), params->params.aggregates)) - , group_by_info(group_by_info_) , sort_description(group_by_description_) , aggregate_columns(params->params.aggregates_size) , many_data(std::move(many_data_)) @@ -41,13 +40,13 @@ AggregatingInOrderTransform::AggregatingInOrderTransform( /// We won't finalize states in order to merge same states (generated due to multi-thread execution) in AggregatingSortedTransform res_header = params->getCustomHeader(/* final_= */ false); - for (size_t i = 0; i < group_by_info->sort_description_for_merging.size(); ++i) + for (size_t i = 0; i < sort_description_for_merging.size(); ++i) { const auto & column_description = group_by_description_[i]; group_by_description.emplace_back(column_description, res_header.getPositionByName(column_description.column_name)); } - if (group_by_info->sort_description_for_merging.size() < group_by_description_.size()) + if (sort_description_for_merging.size() < group_by_description_.size()) { group_by_key = true; /// group_by_description may contains duplicates, so we use keys_size from Aggregator::params diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index ee9ab0f4b79..af63ac61c3c 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -23,13 +23,13 @@ class AggregatingInOrderTransform : public IProcessor { public: AggregatingInOrderTransform(Block header, AggregatingTransformParamsPtr params, - InputOrderInfoPtr group_by_info_, + const SortDescription & sort_description_for_merging, const SortDescription & group_by_description_, size_t max_block_size_, size_t max_block_bytes_, ManyAggregatedDataPtr many_data, size_t current_variant); AggregatingInOrderTransform(Block header, AggregatingTransformParamsPtr params, - InputOrderInfoPtr group_by_info_, + const SortDescription & sort_description_for_merging, const SortDescription & group_by_description_, size_t max_block_size_, size_t max_block_bytes_); @@ -58,7 +58,6 @@ private: AggregatingTransformParamsPtr params; ColumnsMask aggregates_mask; - InputOrderInfoPtr group_by_info; /// For sortBlock() SortDescription sort_description; SortDescriptionWithPositions group_by_description; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index cc484855e76..085399e4941 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -225,13 +225,13 @@ Chain buildPushingToViewsChain( disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); auto table_id = storage->getStorageID(); - Dependencies dependencies = DatabaseCatalog::instance().getDependencies(table_id); + auto views = DatabaseCatalog::instance().getDependentViews(table_id); /// We need special context for materialized views insertions ContextMutablePtr select_context; ContextMutablePtr insert_context; ViewsDataPtr views_data; - if (!dependencies.empty()) + if (!views.empty()) { select_context = Context::createCopy(context); insert_context = Context::createCopy(context); @@ -253,10 +253,10 @@ Chain buildPushingToViewsChain( std::vector chains; - for (const auto & database_table : dependencies) + for (const auto & view_id : views) { - auto dependent_table = DatabaseCatalog::instance().getTable(database_table, context); - auto dependent_metadata_snapshot = dependent_table->getInMemoryMetadataPtr(); + auto view = DatabaseCatalog::instance().getTable(view_id, context); + auto view_metadata_snapshot = view->getInMemoryMetadataPtr(); ASTPtr query; Chain out; @@ -288,7 +288,7 @@ Chain buildPushingToViewsChain( views_data->thread_status_holder->thread_statuses.push_front(std::move(view_thread_status_ptr)); auto runtime_stats = std::make_unique(); - runtime_stats->target_name = database_table.getFullTableName(); + runtime_stats->target_name = view_id.getFullTableName(); runtime_stats->thread_status = view_thread_status; runtime_stats->event_time = std::chrono::system_clock::now(); runtime_stats->event_status = QueryViewsLogElement::ViewStatus::EXCEPTION_BEFORE_START; @@ -297,7 +297,7 @@ Chain buildPushingToViewsChain( auto & target_name = runtime_stats->target_name; auto * view_counter_ms = &runtime_stats->elapsed_ms; - if (auto * materialized_view = dynamic_cast(dependent_table.get())) + if (auto * materialized_view = dynamic_cast(view.get())) { type = QueryViewsLogElement::ViewType::MATERIALIZED; result_chain.addTableLock(materialized_view->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); @@ -305,7 +305,7 @@ Chain buildPushingToViewsChain( StoragePtr inner_table = materialized_view->getTargetTable(); auto inner_table_id = inner_table->getStorageID(); auto inner_metadata_snapshot = inner_table->getInMemoryMetadataPtr(); - query = dependent_metadata_snapshot->getSelectQuery().inner_query; + query = view_metadata_snapshot->getSelectQuery().inner_query; target_name = inner_table_id.getFullTableName(); /// Get list of columns we get from select query. @@ -324,31 +324,31 @@ Chain buildPushingToViewsChain( InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms); - out.addStorageHolder(dependent_table); + out.addStorageHolder(view); out.addStorageHolder(inner_table); } - else if (auto * live_view = dynamic_cast(dependent_table.get())) + else if (auto * live_view = dynamic_cast(view.get())) { runtime_stats->type = QueryViewsLogElement::ViewType::LIVE; query = live_view->getInnerQuery(); // Used only to log in system.query_views_log out = buildPushingToViewsChain( - dependent_table, dependent_metadata_snapshot, insert_context, ASTPtr(), true, thread_status_holder, view_counter_ms, storage_header); + view, view_metadata_snapshot, insert_context, ASTPtr(), true, thread_status_holder, view_counter_ms, storage_header); } - else if (auto * window_view = dynamic_cast(dependent_table.get())) + else if (auto * window_view = dynamic_cast(view.get())) { runtime_stats->type = QueryViewsLogElement::ViewType::WINDOW; query = window_view->getMergeableQuery(); // Used only to log in system.query_views_log out = buildPushingToViewsChain( - dependent_table, dependent_metadata_snapshot, insert_context, ASTPtr(), true, thread_status_holder, view_counter_ms); + view, view_metadata_snapshot, insert_context, ASTPtr(), true, thread_status_holder, view_counter_ms); } else out = buildPushingToViewsChain( - dependent_table, dependent_metadata_snapshot, insert_context, ASTPtr(), false, thread_status_holder, view_counter_ms); + view, view_metadata_snapshot, insert_context, ASTPtr(), false, thread_status_holder, view_counter_ms); views_data->views.emplace_back(ViewRuntimeData{ //-V614 std::move(query), out.getInputHeader(), - database_table, + view_id, nullptr, std::move(runtime_stats)}); @@ -367,7 +367,7 @@ Chain buildPushingToViewsChain( if (!no_destination) { context->getQueryContext()->addQueryAccessInfo( - backQuoteIfNeed(database_table.getDatabaseName()), views_data->views.back().runtime_stats->target_name, {}, "", database_table.getFullTableName()); + backQuoteIfNeed(view_id.getDatabaseName()), views_data->views.back().runtime_stats->target_name, {}, "", view_id.getFullTableName()); } } diff --git a/src/Server/HTTP/HTTPContext.h b/src/Server/HTTP/HTTPContext.h new file mode 100644 index 00000000000..09c46ed188c --- /dev/null +++ b/src/Server/HTTP/HTTPContext.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +namespace DB +{ + +struct IHTTPContext +{ + virtual uint64_t getMaxHstsAge() const = 0; + virtual uint64_t getMaxUriSize() const = 0; + virtual uint64_t getMaxFields() const = 0; + virtual uint64_t getMaxFieldNameSize() const = 0; + virtual uint64_t getMaxFieldValueSize() const = 0; + virtual uint64_t getMaxChunkSize() const = 0; + virtual Poco::Timespan getReceiveTimeout() const = 0; + virtual Poco::Timespan getSendTimeout() const = 0; + + virtual ~IHTTPContext() = default; +}; + +using HTTPContextPtr = std::shared_ptr; + +} diff --git a/src/Server/HTTP/HTTPServer.cpp b/src/Server/HTTP/HTTPServer.cpp index 2e91fad1c0f..46734933263 100644 --- a/src/Server/HTTP/HTTPServer.cpp +++ b/src/Server/HTTP/HTTPServer.cpp @@ -6,7 +6,7 @@ namespace DB { HTTPServer::HTTPServer( - ContextPtr context, + HTTPContextPtr context, HTTPRequestHandlerFactoryPtr factory_, Poco::ThreadPool & thread_pool, Poco::Net::ServerSocket & socket_, diff --git a/src/Server/HTTP/HTTPServer.h b/src/Server/HTTP/HTTPServer.h index 07ad54d267f..adfb21e7c62 100644 --- a/src/Server/HTTP/HTTPServer.h +++ b/src/Server/HTTP/HTTPServer.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -11,13 +12,11 @@ namespace DB { -class Context; - class HTTPServer : public TCPServer { public: explicit HTTPServer( - ContextPtr context, + HTTPContextPtr context, HTTPRequestHandlerFactoryPtr factory, Poco::ThreadPool & thread_pool, Poco::Net::ServerSocket & socket, diff --git a/src/Server/HTTP/HTTPServerConnection.cpp b/src/Server/HTTP/HTTPServerConnection.cpp index 92a994b3a4e..926d37a11ee 100644 --- a/src/Server/HTTP/HTTPServerConnection.cpp +++ b/src/Server/HTTP/HTTPServerConnection.cpp @@ -7,12 +7,12 @@ namespace DB { HTTPServerConnection::HTTPServerConnection( - ContextPtr context_, + HTTPContextPtr context_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket, Poco::Net::HTTPServerParams::Ptr params_, HTTPRequestHandlerFactoryPtr factory_) - : TCPServerConnection(socket), context(Context::createCopy(context_)), tcp_server(tcp_server_), params(params_), factory(factory_), stopped(false) + : TCPServerConnection(socket), context(std::move(context_)), tcp_server(tcp_server_), params(params_), factory(factory_), stopped(false) { poco_check_ptr(factory); } @@ -36,7 +36,7 @@ void HTTPServerConnection::run() if (request.isSecure()) { - size_t hsts_max_age = context->getSettingsRef().hsts_max_age.value; + size_t hsts_max_age = context->getMaxHstsAge(); if (hsts_max_age > 0) response.add("Strict-Transport-Security", "max-age=" + std::to_string(hsts_max_age)); diff --git a/src/Server/HTTP/HTTPServerConnection.h b/src/Server/HTTP/HTTPServerConnection.h index db3969f6ffb..cce4f44f203 100644 --- a/src/Server/HTTP/HTTPServerConnection.h +++ b/src/Server/HTTP/HTTPServerConnection.h @@ -1,7 +1,7 @@ #pragma once -#include #include +#include #include #include @@ -15,7 +15,7 @@ class HTTPServerConnection : public Poco::Net::TCPServerConnection { public: HTTPServerConnection( - ContextPtr context, + HTTPContextPtr context, TCPServer & tcp_server, const Poco::Net::StreamSocket & socket, Poco::Net::HTTPServerParams::Ptr params, @@ -27,7 +27,7 @@ protected: static void sendErrorResponse(Poco::Net::HTTPServerSession & session, Poco::Net::HTTPResponse::HTTPStatus status); private: - ContextPtr context; + HTTPContextPtr context; TCPServer & tcp_server; Poco::Net::HTTPServerParams::Ptr params; HTTPRequestHandlerFactoryPtr factory; diff --git a/src/Server/HTTP/HTTPServerConnectionFactory.cpp b/src/Server/HTTP/HTTPServerConnectionFactory.cpp index 008da222c79..7e4edbbf542 100644 --- a/src/Server/HTTP/HTTPServerConnectionFactory.cpp +++ b/src/Server/HTTP/HTTPServerConnectionFactory.cpp @@ -5,8 +5,8 @@ namespace DB { HTTPServerConnectionFactory::HTTPServerConnectionFactory( - ContextPtr context_, Poco::Net::HTTPServerParams::Ptr params_, HTTPRequestHandlerFactoryPtr factory_) - : context(Context::createCopy(context_)), params(params_), factory(factory_) + HTTPContextPtr context_, Poco::Net::HTTPServerParams::Ptr params_, HTTPRequestHandlerFactoryPtr factory_) + : context(std::move(context_)), params(params_), factory(factory_) { poco_check_ptr(factory); } diff --git a/src/Server/HTTP/HTTPServerConnectionFactory.h b/src/Server/HTTP/HTTPServerConnectionFactory.h index a19dc6d4d5c..03648ce7be7 100644 --- a/src/Server/HTTP/HTTPServerConnectionFactory.h +++ b/src/Server/HTTP/HTTPServerConnectionFactory.h @@ -1,7 +1,7 @@ #pragma once -#include #include +#include #include #include @@ -12,12 +12,12 @@ namespace DB class HTTPServerConnectionFactory : public TCPServerConnectionFactory { public: - HTTPServerConnectionFactory(ContextPtr context, Poco::Net::HTTPServerParams::Ptr params, HTTPRequestHandlerFactoryPtr factory); + HTTPServerConnectionFactory(HTTPContextPtr context, Poco::Net::HTTPServerParams::Ptr params, HTTPRequestHandlerFactoryPtr factory); Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override; private: - ContextPtr context; + HTTPContextPtr context; Poco::Net::HTTPServerParams::Ptr params; HTTPRequestHandlerFactoryPtr factory; }; diff --git a/src/Server/HTTP/HTTPServerRequest.cpp b/src/Server/HTTP/HTTPServerRequest.cpp index 3b8df07b772..a82eb95aee1 100644 --- a/src/Server/HTTP/HTTPServerRequest.cpp +++ b/src/Server/HTTP/HTTPServerRequest.cpp @@ -1,6 +1,5 @@ #include -#include #include #include #include @@ -21,11 +20,11 @@ namespace DB { -HTTPServerRequest::HTTPServerRequest(ContextPtr context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session) - : max_uri_size(context->getSettingsRef().http_max_uri_size) - , max_fields_number(context->getSettingsRef().http_max_fields) - , max_field_name_size(context->getSettingsRef().http_max_field_name_size) - , max_field_value_size(context->getSettingsRef().http_max_field_value_size) +HTTPServerRequest::HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session) + : max_uri_size(context->getMaxUriSize()) + , max_fields_number(context->getMaxFields()) + , max_field_name_size(context->getMaxFieldNameSize()) + , max_field_value_size(context->getMaxFieldValueSize()) { response.attachRequest(this); @@ -34,8 +33,8 @@ HTTPServerRequest::HTTPServerRequest(ContextPtr context, HTTPServerResponse & re server_address = session.serverAddress(); secure = session.socket().secure(); - auto receive_timeout = context->getSettingsRef().http_receive_timeout; - auto send_timeout = context->getSettingsRef().http_send_timeout; + auto receive_timeout = context->getReceiveTimeout(); + auto send_timeout = context->getSendTimeout(); session.socket().setReceiveTimeout(receive_timeout); session.socket().setSendTimeout(send_timeout); @@ -46,7 +45,7 @@ HTTPServerRequest::HTTPServerRequest(ContextPtr context, HTTPServerResponse & re readRequest(*in); /// Try parse according to RFC7230 if (getChunkedTransferEncoding()) - stream = std::make_unique(std::move(in), context->getSettingsRef().http_max_chunk_size); + stream = std::make_unique(std::move(in), context->getMaxChunkSize()); else if (hasContentLength()) stream = std::make_unique(std::move(in), getContentLength(), false); else if (getMethod() != HTTPRequest::HTTP_GET && getMethod() != HTTPRequest::HTTP_HEAD && getMethod() != HTTPRequest::HTTP_DELETE) diff --git a/src/Server/HTTP/HTTPServerRequest.h b/src/Server/HTTP/HTTPServerRequest.h index 7ddbd296280..1f38334c745 100644 --- a/src/Server/HTTP/HTTPServerRequest.h +++ b/src/Server/HTTP/HTTPServerRequest.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "config.h" #include @@ -18,7 +19,7 @@ class ReadBufferFromPocoSocket; class HTTPServerRequest : public HTTPRequest { public: - HTTPServerRequest(ContextPtr context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session); + HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse & response, Poco::Net::HTTPServerSession & session); /// FIXME: it's a little bit inconvenient interface. The rationale is that all other ReadBuffer's wrap each other /// via unique_ptr - but we can't inherit HTTPServerRequest from ReadBuffer and pass it around, diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index ac8f8332a9e..e4da7941b50 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include @@ -33,35 +32,6 @@ static void addDefaultHandlersFactory( const Poco::Util::AbstractConfiguration & config, AsynchronousMetrics & async_metrics); -HTTPRequestHandlerFactoryMain::HTTPRequestHandlerFactoryMain(const std::string & name_) - : log(&Poco::Logger::get(name_)), name(name_) -{ -} - -std::unique_ptr HTTPRequestHandlerFactoryMain::createRequestHandler(const HTTPServerRequest & request) -{ - LOG_TRACE(log, "HTTP Request for {}. Method: {}, Address: {}, User-Agent: {}{}, Content Type: {}, Transfer Encoding: {}, X-Forwarded-For: {}", - name, request.getMethod(), request.clientAddress().toString(), request.get("User-Agent", "(none)"), - (request.hasContentLength() ? (", Length: " + std::to_string(request.getContentLength())) : ("")), - request.getContentType(), request.getTransferEncoding(), request.get("X-Forwarded-For", "(none)")); - - for (auto & handler_factory : child_factories) - { - auto handler = handler_factory->createRequestHandler(request); - if (handler) - return handler; - } - - if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET - || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD - || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST) - { - return std::unique_ptr(new NotFoundHandler); - } - - return nullptr; -} - static inline auto createHandlersFactoryFromConfig( IServer & server, const Poco::Util::AbstractConfiguration & config, @@ -144,15 +114,7 @@ HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, const Poco:: else if (name == "InterserverIOHTTPHandler-factory" || name == "InterserverIOHTTPSHandler-factory") return createInterserverHTTPHandlerFactory(server, name); else if (name == "PrometheusHandler-factory") - { - auto factory = std::make_shared(name); - auto handler = std::make_shared>( - server, PrometheusMetricsWriter(config, "prometheus", async_metrics)); - handler->attachStrictPath(config.getString("prometheus.endpoint", "/metrics")); - handler->allowGetAndHeadRequest(); - factory->addHandler(handler); - return factory; - } + return createPrometheusMainHandlerFactory(server, config, async_metrics, name); throw Exception("LOGICAL ERROR: Unknown HTTP handler factory name.", ErrorCodes::LOGICAL_ERROR); } diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index 9f306e787b0..f56c712c615 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -1,9 +1,10 @@ #pragma once -#include +#include #include #include #include +#include #include #include @@ -19,23 +20,6 @@ namespace ErrorCodes class IServer; -/// Handle request using child handlers -class HTTPRequestHandlerFactoryMain : public HTTPRequestHandlerFactory -{ -public: - explicit HTTPRequestHandlerFactoryMain(const std::string & name_); - - void addHandler(HTTPRequestHandlerFactoryPtr child_factory) { child_factories.emplace_back(child_factory); } - - std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; - -private: - Poco::Logger * log; - std::string name; - - std::vector child_factories; -}; - template class HandlingRuleHTTPHandlerFactory : public HTTPRequestHandlerFactory { @@ -148,6 +132,12 @@ createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix); +HTTPRequestHandlerFactoryPtr +createPrometheusMainHandlerFactory(IServer & server, + const Poco::Util::AbstractConfiguration & config, + AsynchronousMetrics & async_metrics, + const std::string & name); + /// @param server - used in handlers to check IServer::isCancelled() /// @param config - not the same as server.config(), since it can be newer /// @param async_metrics - used for prometheus (in case of prometheus.asynchronous_metrics=true) diff --git a/src/Server/HTTPRequestHandlerFactoryMain.cpp b/src/Server/HTTPRequestHandlerFactoryMain.cpp new file mode 100644 index 00000000000..61a2909d30f --- /dev/null +++ b/src/Server/HTTPRequestHandlerFactoryMain.cpp @@ -0,0 +1,38 @@ +#include +#include + +#include + +namespace DB +{ + +HTTPRequestHandlerFactoryMain::HTTPRequestHandlerFactoryMain(const std::string & name_) + : log(&Poco::Logger::get(name_)), name(name_) +{ +} + +std::unique_ptr HTTPRequestHandlerFactoryMain::createRequestHandler(const HTTPServerRequest & request) +{ + LOG_TRACE(log, "HTTP Request for {}. Method: {}, Address: {}, User-Agent: {}{}, Content Type: {}, Transfer Encoding: {}, X-Forwarded-For: {}", + name, request.getMethod(), request.clientAddress().toString(), request.get("User-Agent", "(none)"), + (request.hasContentLength() ? (", Length: " + std::to_string(request.getContentLength())) : ("")), + request.getContentType(), request.getTransferEncoding(), request.get("X-Forwarded-For", "(none)")); + + for (auto & handler_factory : child_factories) + { + auto handler = handler_factory->createRequestHandler(request); + if (handler) + return handler; + } + + if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET + || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD + || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST) + { + return std::unique_ptr(new NotFoundHandler); + } + + return nullptr; +} + +} diff --git a/src/Server/HTTPRequestHandlerFactoryMain.h b/src/Server/HTTPRequestHandlerFactoryMain.h new file mode 100644 index 00000000000..b0e57bd6b3b --- /dev/null +++ b/src/Server/HTTPRequestHandlerFactoryMain.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +/// Handle request using child handlers +class HTTPRequestHandlerFactoryMain : public HTTPRequestHandlerFactory +{ +public: + explicit HTTPRequestHandlerFactoryMain(const std::string & name_); + + void addHandler(HTTPRequestHandlerFactoryPtr child_factory) { child_factories.emplace_back(child_factory); } + + std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; + +private: + Poco::Logger * log; + std::string name; + + std::vector child_factories; +}; + +} diff --git a/src/Server/PrometheusMetricsWriter.h b/src/Server/PrometheusMetricsWriter.h index 0c2dde1f66f..b4f6ab57def 100644 --- a/src/Server/PrometheusMetricsWriter.h +++ b/src/Server/PrometheusMetricsWriter.h @@ -2,7 +2,7 @@ #include -#include +#include #include #include diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 896efcca674..79025624206 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -53,4 +53,19 @@ createPrometheusHandlerFactory(IServer & server, return factory; } +HTTPRequestHandlerFactoryPtr +createPrometheusMainHandlerFactory(IServer & server, + const Poco::Util::AbstractConfiguration & config, + AsynchronousMetrics & async_metrics, + const std::string & name) +{ + auto factory = std::make_shared(name); + auto handler = std::make_shared>( + server, PrometheusMetricsWriter(config, "prometheus", async_metrics)); + handler->attachStrictPath(config.getString("prometheus.endpoint", "/metrics")); + handler->allowGetAndHeadRequest(); + factory->addHandler(handler); + return factory; +} + } diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index f1300dfd940..39e91e19014 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -572,7 +572,6 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri std::map StorageDistributedDirectoryMonitor::getFiles() { std::map files; - size_t new_bytes_count = 0; fs::directory_iterator end; for (fs::directory_iterator it{path}; it != end; ++it) @@ -581,23 +580,9 @@ std::map StorageDistributedDirectoryMonitor::getFiles() if (!it->is_directory() && startsWith(fs::path(file_path_str).extension(), ".bin")) { files[parse(fs::path(file_path_str).stem())] = file_path_str; - new_bytes_count += fs::file_size(fs::path(file_path_str)); } } - { - std::lock_guard status_lock(status_mutex); - - if (status.files_count != files.size()) - LOG_TRACE(log, "Files set to {} (was {})", files.size(), status.files_count); - if (status.bytes_count != new_bytes_count) - LOG_TRACE(log, "Bytes set to {} (was {})", new_bytes_count, status.bytes_count); - - metric_pending_files.changeTo(files.size()); - status.files_count = files.size(); - status.bytes_count = new_bytes_count; - } - return files; } bool StorageDistributedDirectoryMonitor::processFiles(const std::map & files) diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 722843a7ab6..0f4563b6f35 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -547,23 +547,23 @@ size_t StorageFileLog::getPollTimeoutMillisecond() const bool StorageFileLog::checkDependencies(const StorageID & table_id) { // Check if all dependencies are attached - auto dependencies = DatabaseCatalog::instance().getDependencies(table_id); - if (dependencies.empty()) + auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); + if (view_ids.empty()) return true; - for (const auto & storage : dependencies) + for (const auto & view_id : view_ids) { - auto table = DatabaseCatalog::instance().tryGetTable(storage, getContext()); - if (!table) + auto view = DatabaseCatalog::instance().tryGetTable(view_id, getContext()); + if (!view) return false; // If it materialized view, check it's target table - auto * materialized_view = dynamic_cast(table.get()); + auto * materialized_view = dynamic_cast(view.get()); if (materialized_view && !materialized_view->tryGetTargetTable()) return false; // Check all its dependencies - if (!checkDependencies(storage)) + if (!checkDependencies(view_id)) return false; } @@ -574,7 +574,7 @@ size_t StorageFileLog::getTableDependentCount() const { auto table_id = getStorageID(); // Check if at least one direct dependency is attached - return DatabaseCatalog::instance().getDependencies(table_id).size(); + return DatabaseCatalog::instance().getDependentViews(table_id).size(); } void StorageFileLog::threadFunc() diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index bc2d38de215..76100624d51 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -232,16 +232,16 @@ Names IStorage::getAllRegisteredNames() const NameDependencies IStorage::getDependentViewsByColumn(ContextPtr context) const { NameDependencies name_deps; - auto dependencies = DatabaseCatalog::instance().getDependencies(storage_id); - for (const auto & depend_id : dependencies) + auto view_ids = DatabaseCatalog::instance().getDependentViews(storage_id); + for (const auto & view_id : view_ids) { - auto depend_table = DatabaseCatalog::instance().getTable(depend_id, context); - if (depend_table->getInMemoryMetadataPtr()->select.inner_query) + auto view = DatabaseCatalog::instance().getTable(view_id, context); + if (view->getInMemoryMetadataPtr()->select.inner_query) { - const auto & select_query = depend_table->getInMemoryMetadataPtr()->select.inner_query; + const auto & select_query = view->getInMemoryMetadataPtr()->select.inner_query; auto required_columns = InterpreterSelectQuery(select_query, context, SelectQueryOptions{}.noModify()).getRequiredColumns(); for (const auto & col_name : required_columns) - name_deps[col_name].push_back(depend_id.table_name); + name_deps[col_name].push_back(view_id.table_name); } } return name_deps; diff --git a/src/Storages/KVStorageUtils.cpp b/src/Storages/KVStorageUtils.cpp index 41aa91eef31..7ec1340e339 100644 --- a/src/Storages/KVStorageUtils.cpp +++ b/src/Storages/KVStorageUtils.cpp @@ -140,7 +140,7 @@ std::vector serializeKeysToRawString( { std::string & serialized_key = result.emplace_back(); WriteBufferFromString wb(serialized_key); - key_column_type->getDefaultSerialization()->serializeBinary(*it, wb); + key_column_type->getDefaultSerialization()->serializeBinary(*it, wb, {}); wb.finalize(); ++it; @@ -165,7 +165,7 @@ std::vector serializeKeysToRawString(const ColumnWithTypeAndName & Field field; keys.column->get(i, field); /// TODO(@vdimir): use serializeBinaryBulk - keys.type->getDefaultSerialization()->serializeBinary(field, wb); + keys.type->getDefaultSerialization()->serializeBinary(field, wb, {}); wb.finalize(); } return result; diff --git a/src/Storages/KVStorageUtils.h b/src/Storages/KVStorageUtils.h index e3216164869..0574539f4c7 100644 --- a/src/Storages/KVStorageUtils.h +++ b/src/Storages/KVStorageUtils.h @@ -30,7 +30,7 @@ void fillColumns(const K & key, const V & value, size_t key_pos, const Block & h for (size_t i = 0; i < header.columns(); ++i) { const auto & serialization = header.getByPosition(i).type->getDefaultSerialization(); - serialization->deserializeBinary(*columns[i], i == key_pos ? key_buffer : value_buffer); + serialization->deserializeBinary(*columns[i], i == key_pos ? key_buffer : value_buffer, {}); } } diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 8e4dd78379e..77afa7ba623 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -584,24 +584,24 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & conf) bool StorageKafka::checkDependencies(const StorageID & table_id) { // Check if all dependencies are attached - auto dependencies = DatabaseCatalog::instance().getDependencies(table_id); - if (dependencies.empty()) + auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); + if (view_ids.empty()) return true; // Check the dependencies are ready? - for (const auto & db_tab : dependencies) + for (const auto & view_id : view_ids) { - auto table = DatabaseCatalog::instance().tryGetTable(db_tab, getContext()); - if (!table) + auto view = DatabaseCatalog::instance().tryGetTable(view_id, getContext()); + if (!view) return false; // If it materialized view, check it's target table - auto * materialized_view = dynamic_cast(table.get()); + auto * materialized_view = dynamic_cast(view.get()); if (materialized_view && !materialized_view->tryGetTargetTable()) return false; // Check all its dependencies - if (!checkDependencies(db_tab)) + if (!checkDependencies(view_id)) return false; } @@ -616,8 +616,8 @@ void StorageKafka::threadFunc(size_t idx) { auto table_id = getStorageID(); // Check if at least one direct dependency is attached - size_t dependencies_count = DatabaseCatalog::instance().getDependencies(table_id).size(); - if (dependencies_count) + size_t num_views = DatabaseCatalog::instance().getDependentViews(table_id).size(); + if (num_views) { auto start_time = std::chrono::steady_clock::now(); @@ -629,7 +629,7 @@ void StorageKafka::threadFunc(size_t idx) if (!checkDependencies(table_id)) break; - LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count); + LOG_DEBUG(log, "Started streaming to {} attached views", num_views); // Exit the loop & reschedule if some stream stalled auto some_stream_is_stalled = streamToViews(); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 3d27205d638..c92968e4bcc 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -304,7 +304,7 @@ StorageLiveView::StorageLiveView( auto inner_query_tmp = inner_query->clone(); select_table_id = extractDependentTable(inner_query_tmp, getContext(), table_id_.table_name, inner_subquery); - DatabaseCatalog::instance().addDependency(select_table_id, table_id_); + DatabaseCatalog::instance().addViewDependency(select_table_id, table_id_); if (query.live_view_periodic_refresh) { @@ -434,11 +434,11 @@ bool StorageLiveView::getNewBlocks() void StorageLiveView::checkTableCanBeDropped() const { auto table_id = getStorageID(); - Dependencies dependencies = DatabaseCatalog::instance().getDependencies(table_id); - if (!dependencies.empty()) + auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); + if (!view_ids.empty()) { - StorageID dependent_table_id = dependencies.front(); - throw Exception("Table has dependency " + dependent_table_id.getNameForLogs(), ErrorCodes::TABLE_WAS_NOT_DROPPED); + StorageID view_id = *view_ids.begin(); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Table has dependency {}", view_id); } } @@ -455,7 +455,7 @@ void StorageLiveView::shutdown() if (is_periodically_refreshed) periodic_refresh_task->deactivate(); - DatabaseCatalog::instance().removeDependency(select_table_id, getStorageID()); + DatabaseCatalog::instance().removeViewDependency(select_table_id, getStorageID()); } StorageLiveView::~StorageLiveView() @@ -466,7 +466,7 @@ StorageLiveView::~StorageLiveView() void StorageLiveView::drop() { auto table_id = getStorageID(); - DatabaseCatalog::instance().removeDependency(select_table_id, table_id); + DatabaseCatalog::instance().removeViewDependency(select_table_id, table_id); std::lock_guard lock(mutex); is_dropped = true; diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index e68a04fd9bc..347ea16950e 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -101,6 +101,7 @@ public: bool isValid() const override { return it->isValid(); } bool isFile() const override { return isValid() && disk->isFile(it->path()); } std::string name() const override { return it->name(); } + std::string path() const override { return it->path(); } private: DiskPtr disk; @@ -259,9 +260,17 @@ void DataPartStorageOnDisk::remove( std::string proj_dir_name = projection.name + proj_suffix; projection_directories.emplace(proj_dir_name); + NameSet files_not_to_remove_for_projection; + for (const auto & file_name : can_remove_description->files_not_to_remove) + { + if (file_name.starts_with(proj_dir_name)) + files_not_to_remove_for_projection.emplace(fs::path(file_name).filename()); + } + LOG_DEBUG(log, "Will not remove files [{}] for projection {}", fmt::join(files_not_to_remove_for_projection, ", "), projection.name); + clearDirectory( fs::path(to) / proj_dir_name, - can_remove_description->can_remove_anything, can_remove_description->files_not_to_remove, projection.checksums, {}, is_temp, state, log, true); + can_remove_description->can_remove_anything, files_not_to_remove_for_projection, projection.checksums, {}, is_temp, state, log, true); } /// It is possible that we are removing the part which have a written but not loaded projection. diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index c6669908db4..53ee2738fc6 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -39,6 +39,9 @@ public: /// Name of the file that the iterator currently points to. virtual std::string name() const = 0; + /// Path of the file that the iterator currently points to. + virtual std::string path() const = 0; + virtual ~IDataPartStorageIterator() = default; }; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 630fbda833e..02a7a2ae641 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -86,9 +86,9 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par auto serialization = minmax_column_types[i]->getDefaultSerialization(); Field min_val; - serialization->deserializeBinary(min_val, *file); + serialization->deserializeBinary(min_val, *file, {}); Field max_val; - serialization->deserializeBinary(max_val, *file); + serialization->deserializeBinary(max_val, *file, {}); // NULL_LAST if (min_val.isNull()) @@ -134,8 +134,8 @@ IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::s auto out = part_storage.writeFile(file_name, DBMS_DEFAULT_BUFFER_SIZE, {}); HashingWriteBuffer out_hashing(*out); - serialization->serializeBinary(hyperrectangle[i].left, out_hashing); - serialization->serializeBinary(hyperrectangle[i].right, out_hashing); + serialization->serializeBinary(hyperrectangle[i].left, out_hashing, {}); + serialization->serializeBinary(hyperrectangle[i].right, out_hashing, {}); out_hashing.next(); out_checksums.files[file_name].file_size = out_hashing.count(); out_checksums.files[file_name].file_hash = out_hashing.getHash(); @@ -755,7 +755,7 @@ void IMergeTreeDataPart::loadIndex() for (size_t i = 0; i < marks_count; ++i) //-V756 for (size_t j = 0; j < key_size; ++j) - key_serializations[j]->deserializeBinary(*loaded_index[j], *index_file); + key_serializations[j]->deserializeBinary(*loaded_index[j], *index_file, {}); for (size_t i = 0; i < key_size; ++i) { @@ -1643,6 +1643,12 @@ void IMergeTreeDataPart::remove() return CanRemoveDescription{.can_remove_anything = true, .files_not_to_remove = {} }; } + if (getState() == MergeTreeDataPartState::Temporary) + { + LOG_TRACE(storage.log, "Part {} in temporary state can be removed without unlocking shared state", name); + return CanRemoveDescription{.can_remove_anything = false, .files_not_to_remove = {} }; + } + auto [can_remove, files_not_to_remove] = canRemovePart(); if (!can_remove) LOG_TRACE(storage.log, "Blobs of part {} cannot be removed", name); @@ -2041,14 +2047,6 @@ bool isCompressedFromIndexExtension(const String & index_extension) return index_extension == getIndexExtension(true); } -Strings getPartsNamesWithStates(const MergeTreeDataPartsVector & parts) -{ - Strings part_names; - for (const auto & p : parts) - part_names.push_back(p->getNameWithState()); - return part_names; -} - Strings getPartsNames(const MergeTreeDataPartsVector & parts) { Strings part_names; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 7801e5a60ba..e6c6f02b098 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -597,7 +597,6 @@ bool isCompressedFromIndexExtension(const String & index_extension); using MergeTreeDataPartsVector = std::vector; -Strings getPartsNamesWithStates(const MergeTreeDataPartsVector & parts); Strings getPartsNames(const MergeTreeDataPartsVector & parts); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6e14fec22af..d2b1da9abf8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4691,7 +4691,7 @@ void MergeTreeData::filterVisibleDataParts(DataPartsVector & maybe_visible_parts [[maybe_unused]] size_t visible_size = maybe_visible_parts.size(); LOG_TEST(log, "Got {} parts (of {}) visible in snapshot {} (TID {}): {}", - visible_size, total_size, snapshot_version, current_tid, fmt::join(getPartsNamesWithStates(maybe_visible_parts), ", ")); + visible_size, total_size, snapshot_version, current_tid, fmt::join(getPartsNames(maybe_visible_parts), ", ")); } @@ -6437,7 +6437,25 @@ std::pair MergeTreeData::cloneAn if (!files_to_copy_instead_of_hardlinks.contains(it->name()) && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) + { hardlinked_files->hardlinks_from_source_part.insert(it->name()); + } + } + + auto projections = src_part->getProjectionParts(); + for (const auto & [name, projection_part] : projections) + { + const auto & projection_storage = projection_part->getDataPartStorage(); + for (auto it = projection_storage.iterate(); it->isValid(); it->next()) + { + auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name(); + if (!files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix) + && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME + && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) + { + hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix); + } + } } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index d085bb29b20..a887b0ee322 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -244,7 +244,7 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc const auto & primary_column = primary_index_block.getByPosition(j); index_columns[j]->insertFrom(*primary_column.column, granule.start_row); primary_column.type->getDefaultSerialization()->serializeBinary( - *primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream); + *primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); } } } @@ -312,7 +312,7 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat size_t last_row_number = column.size() - 1; index_columns[j]->insertFrom(column, last_row_number); index_types[j]->getDefaultSerialization()->serializeBinary( - column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream); + column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); } last_block_index_columns.clear(); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 8f824ca0777..677a5c1056e 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -385,9 +385,13 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( : static_cast(settings.max_threads); InputOrderInfoPtr group_by_info = query_info.projection->input_order_info; + SortDescription sort_description_for_merging; SortDescription group_by_sort_description; if (group_by_info && settings.optimize_aggregation_in_order) + { group_by_sort_description = getSortDescriptionFromGroupBy(select_query); + sort_description_for_merging = group_by_info->sort_description_for_merging; + } else group_by_info = nullptr; @@ -406,7 +410,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( temporary_data_merge_threads, /* storage_has_evenly_distributed_read_= */ false, /* group_by_use_nulls */ false, - std::move(group_by_info), + std::move(sort_description_for_merging), std::move(group_by_sort_description), should_produce_results_in_order_of_bucket_number, settings.enable_memory_bound_merging_of_aggregation_results); diff --git a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp index deed9b3f071..64fa7264738 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp @@ -91,12 +91,17 @@ void MergeTreeIndexGranuleBloomFilter::deserializeBinary(ReadBuffer & istr, Merg throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index version {}.", version); readVarUInt(total_rows, istr); + + static size_t atom_size = 8; + size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size; + size_t read_size = bytes_size; for (auto & filter : bloom_filters) { - static size_t atom_size = 8; - size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size; filter = std::make_shared(bytes_size, hash_functions, 0); - istr.readStrict(reinterpret_cast(filter->getFilter().data()), bytes_size); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + read_size = filter->getFilter().size() * sizeof(BloomFilter::UnderType); +#endif + istr.readStrict(reinterpret_cast(filter->getFilter().data()), read_size); } } @@ -105,11 +110,17 @@ void MergeTreeIndexGranuleBloomFilter::serializeBinary(WriteBuffer & ostr) const if (empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty bloom filter index."); - static size_t atom_size = 8; writeVarUInt(total_rows, ostr); - size_t bytes_size = (bits_per_row * total_rows + atom_size - 1) / atom_size; + + static size_t atom_size = 8; + size_t write_size = (bits_per_row * total_rows + atom_size - 1) / atom_size; for (const auto & bloom_filter : bloom_filters) - ostr.write(reinterpret_cast(bloom_filter->getFilter().data()), bytes_size); + { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + write_size = bloom_filter->getFilter().size() * sizeof(BloomFilter::UnderType); +#endif + ostr.write(reinterpret_cast(bloom_filter->getFilter().data()), write_size); + } } void MergeTreeIndexGranuleBloomFilter::fillingBloomFilter(BloomFilterPtr & bf, const Block & granule_index_block, size_t index_hash_column) const diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp index 088029d9e8e..d8765ddb9bc 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp @@ -26,7 +26,7 @@ MergeTreeIndexGranuleHypothesis::MergeTreeIndexGranuleHypothesis(const String & void MergeTreeIndexGranuleHypothesis::serializeBinary(WriteBuffer & ostr) const { const auto & size_type = DataTypePtr(std::make_shared()); - size_type->getDefaultSerialization()->serializeBinary(static_cast(met), ostr); + size_type->getDefaultSerialization()->serializeBinary(static_cast(met), ostr, {}); } void MergeTreeIndexGranuleHypothesis::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) @@ -36,7 +36,7 @@ void MergeTreeIndexGranuleHypothesis::deserializeBinary(ReadBuffer & istr, Merge Field field_met; const auto & size_type = DataTypePtr(std::make_shared()); - size_type->getDefaultSerialization()->deserializeBinary(field_met, istr); + size_type->getDefaultSerialization()->deserializeBinary(field_met, istr, {}); met = field_met.get(); is_empty = false; } diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index 43e655a4ee5..fc19f819cf1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -42,8 +42,8 @@ void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const const DataTypePtr & type = index_sample_block.getByPosition(i).type; auto serialization = type->getDefaultSerialization(); - serialization->serializeBinary(hyperrectangle[i].left, ostr); - serialization->serializeBinary(hyperrectangle[i].right, ostr); + serialization->serializeBinary(hyperrectangle[i].left, ostr, {}); + serialization->serializeBinary(hyperrectangle[i].right, ostr, {}); } } @@ -63,8 +63,8 @@ void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & istr, MergeTree case 1: if (!type->isNullable()) { - serialization->deserializeBinary(min_val, istr); - serialization->deserializeBinary(max_val, istr); + serialization->deserializeBinary(min_val, istr, {}); + serialization->deserializeBinary(max_val, istr, {}); } else { @@ -78,8 +78,8 @@ void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & istr, MergeTree readBinary(is_null, istr); if (!is_null) { - serialization->deserializeBinary(min_val, istr); - serialization->deserializeBinary(max_val, istr); + serialization->deserializeBinary(min_val, istr, {}); + serialization->deserializeBinary(max_val, istr, {}); } else { @@ -91,8 +91,8 @@ void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & istr, MergeTree /// New format with proper Nullable support for values that includes Null values case 2: - serialization->deserializeBinary(min_val, istr); - serialization->deserializeBinary(max_val, istr); + serialization->deserializeBinary(min_val, istr, {}); + serialization->deserializeBinary(max_val, istr, {}); // NULL_LAST if (min_val.isNull()) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 0e15f2c4cb6..a28394e943e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -56,11 +56,11 @@ void MergeTreeIndexGranuleSet::serializeBinary(WriteBuffer & ostr) const if (max_rows != 0 && size() > max_rows) { - size_serialization->serializeBinary(0, ostr); + size_serialization->serializeBinary(0, ostr, {}); return; } - size_serialization->serializeBinary(size(), ostr); + size_serialization->serializeBinary(size(), ostr, {}); for (size_t i = 0; i < index_sample_block.columns(); ++i) { @@ -90,7 +90,7 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd Field field_rows; const auto & size_type = DataTypePtr(std::make_shared()); - size_type->getDefaultSerialization()->deserializeBinary(field_rows, istr); + size_type->getDefaultSerialization()->deserializeBinary(field_rows, istr, {}); size_t rows_to_read = field_rows.get(); if (rows_to_read == 0) diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 10f5cc95baf..e7fdf1617f0 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -379,7 +379,7 @@ void MergeTreePartition::load(const MergeTreeData & storage, const PartMetadataM auto file = manager->read("partition.dat"); value.resize(partition_key_sample.columns()); for (size_t i = 0; i < partition_key_sample.columns(); ++i) - partition_key_sample.getByPosition(i).type->getDefaultSerialization()->deserializeBinary(value[i], *file); + partition_key_sample.getByPosition(i).type->getDefaultSerialization()->deserializeBinary(value[i], *file, {}); } std::unique_ptr MergeTreePartition::store(const MergeTreeData & storage, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const @@ -399,7 +399,7 @@ std::unique_ptr MergeTreePartition::store(const Block & HashingWriteBuffer out_hashing(*out); for (size_t i = 0; i < value.size(); ++i) { - partition_key_sample.getByPosition(i).type->getDefaultSerialization()->serializeBinary(value[i], out_hashing); + partition_key_sample.getByPosition(i).type->getDefaultSerialization()->serializeBinary(value[i], out_hashing, {}); } out_hashing.next(); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index e5ba771a198..f6befe67fd4 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1322,9 +1322,11 @@ private: for (auto p_it = projection_data_part_storage_src->iterate(); p_it->isValid(); p_it->next()) { + auto file_name_with_projection_prefix = fs::path(projection_data_part_storage_src->getPartDirectory()) / p_it->name(); projection_data_part_storage_dst->createHardLinkFrom( *projection_data_part_storage_src, p_it->name(), p_it->name()); - hardlinked_files.insert(p_it->name()); + + hardlinked_files.insert(file_name_with_projection_prefix); } } } diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index dea2553700b..5a8e250a972 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -535,24 +535,24 @@ bool StorageNATS::isSubjectInSubscriptions(const std::string & subject) bool StorageNATS::checkDependencies(const StorageID & table_id) { // Check if all dependencies are attached - auto dependencies = DatabaseCatalog::instance().getDependencies(table_id); - if (dependencies.empty()) + auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); + if (view_ids.empty()) return true; // Check the dependencies are ready? - for (const auto & db_tab : dependencies) + for (const auto & view_id : view_ids) { - auto table = DatabaseCatalog::instance().tryGetTable(db_tab, getContext()); - if (!table) + auto view = DatabaseCatalog::instance().tryGetTable(view_id, getContext()); + if (!view) return false; // If it materialized view, check it's target table - auto * materialized_view = dynamic_cast(table.get()); + auto * materialized_view = dynamic_cast(view.get()); if (materialized_view && !materialized_view->tryGetTargetTable()) return false; // Check all its dependencies - if (!checkDependencies(db_tab)) + if (!checkDependencies(view_id)) return false; } @@ -568,10 +568,10 @@ void StorageNATS::streamingToViewsFunc() auto table_id = getStorageID(); // Check if at least one direct dependency is attached - size_t dependencies_count = DatabaseCatalog::instance().getDependencies(table_id).size(); + size_t num_views = DatabaseCatalog::instance().getDependentViews(table_id).size(); bool nats_connected = connection->isConnected() || connection->reconnect(); - if (dependencies_count && nats_connected) + if (num_views && nats_connected) { auto start_time = std::chrono::steady_clock::now(); @@ -583,7 +583,7 @@ void StorageNATS::streamingToViewsFunc() if (!checkDependencies(table_id)) break; - LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count); + LOG_DEBUG(log, "Started streaming to {} attached views", num_views); if (streamToViews()) { diff --git a/src/Storages/NamedCollectionConfiguration.cpp b/src/Storages/NamedCollectionConfiguration.cpp new file mode 100644 index 00000000000..b0e7bdce32a --- /dev/null +++ b/src/Storages/NamedCollectionConfiguration.cpp @@ -0,0 +1,174 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NOT_IMPLEMENTED; +} + +namespace NamedCollectionConfiguration +{ + +template T getConfigValue( + const Poco::Util::AbstractConfiguration & config, + const std::string & path) +{ + return getConfigValueOrDefault(config, path); +} + +template T getConfigValueOrDefault( + const Poco::Util::AbstractConfiguration & config, + const std::string & path, + const T * default_value) +{ + if (!config.has(path)) + { + if (!default_value) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}`", path); + return *default_value; + } + + if constexpr (std::is_same_v) + return config.getString(path); + else if constexpr (std::is_same_v) + return config.getUInt64(path); + else if constexpr (std::is_same_v) + return config.getInt64(path); + else if constexpr (std::is_same_v) + return config.getDouble(path); + else + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Unsupported type in getConfigValueOrDefault(). " + "Supported types are String, UInt64, Int64, Float64"); +} + +template void setConfigValue( + Poco::Util::AbstractConfiguration & config, + const std::string & path, + const T & value, + bool update) +{ + if (!update && config.has(path)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Key `{}` already exists", path); + + if constexpr (std::is_same_v) + config.setString(path, value); + else if constexpr (std::is_same_v) + config.setUInt64(path, value); + else if constexpr (std::is_same_v) + config.setInt64(path, value); + else if constexpr (std::is_same_v) + config.setDouble(path, value); + else + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Unsupported type in setConfigValue(). " + "Supported types are String, UInt64, Int64, Float64"); +} + +template void copyConfigValue( + const Poco::Util::AbstractConfiguration & from_config, + const std::string & from_path, + Poco::Util::AbstractConfiguration & to_config, + const std::string & to_path) +{ + if (!from_config.has(from_path)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}`", from_path); + + if (to_config.has(to_path)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Key `{}` already exists", to_path); + + if constexpr (std::is_same_v) + to_config.setString(to_path, from_config.getString(from_path)); + else if constexpr (std::is_same_v) + to_config.setUInt64(to_path, from_config.getUInt64(from_path)); + else if constexpr (std::is_same_v) + to_config.setInt64(to_path, from_config.getInt64(from_path)); + else if constexpr (std::is_same_v) + to_config.setDouble(to_path, from_config.getDouble(from_path)); + else + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Unsupported type in copyConfigValue(). " + "Supported types are String, UInt64, Int64, Float64"); +} + +void removeConfigValue( + Poco::Util::AbstractConfiguration & config, + const std::string & path) +{ + if (!config.has(path)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}`", path); + config.remove(path); +} + +ConfigurationPtr createEmptyConfiguration(const std::string & root_name) +{ + using DocumentPtr = Poco::AutoPtr; + using ElementPtr = Poco::AutoPtr; + + DocumentPtr xml_document(new Poco::XML::Document()); + ElementPtr root_element(xml_document->createElement(root_name)); + xml_document->appendChild(root_element); + + ConfigurationPtr config(new Poco::Util::XMLConfiguration(xml_document)); + return config; +} + +ConfigurationPtr createConfiguration(const std::string & root_name, const SettingsChanges & settings) +{ + namespace Configuration = NamedCollectionConfiguration; + + auto config = Configuration::createEmptyConfiguration(root_name); + for (const auto & [name, value] : settings) + Configuration::setConfigValue(*config, name, convertFieldToString(value)); + + return config; +} + +template String getConfigValue(const Poco::Util::AbstractConfiguration & config, + const std::string & path); +template UInt64 getConfigValue(const Poco::Util::AbstractConfiguration & config, + const std::string & path); +template Int64 getConfigValue(const Poco::Util::AbstractConfiguration & config, + const std::string & path); +template Float64 getConfigValue(const Poco::Util::AbstractConfiguration & config, + const std::string & path); + +template String getConfigValueOrDefault(const Poco::Util::AbstractConfiguration & config, + const std::string & path, const String * default_value); +template UInt64 getConfigValueOrDefault(const Poco::Util::AbstractConfiguration & config, + const std::string & path, const UInt64 * default_value); +template Int64 getConfigValueOrDefault(const Poco::Util::AbstractConfiguration & config, + const std::string & path, const Int64 * default_value); +template Float64 getConfigValueOrDefault(const Poco::Util::AbstractConfiguration & config, + const std::string & path, const Float64 * default_value); + +template void setConfigValue(Poco::Util::AbstractConfiguration & config, + const std::string & path, const String & value, bool update); +template void setConfigValue(Poco::Util::AbstractConfiguration & config, + const std::string & path, const UInt64 & value, bool update); +template void setConfigValue(Poco::Util::AbstractConfiguration & config, + const std::string & path, const Int64 & value, bool update); +template void setConfigValue(Poco::Util::AbstractConfiguration & config, + const std::string & path, const Float64 & value, bool update); + +template void copyConfigValue(const Poco::Util::AbstractConfiguration & from_config, const std::string & from_path, + Poco::Util::AbstractConfiguration & to_config, const std::string & to_path); +template void copyConfigValue(const Poco::Util::AbstractConfiguration & from_config, const std::string & from_path, + Poco::Util::AbstractConfiguration & to_config, const std::string & to_path); +template void copyConfigValue(const Poco::Util::AbstractConfiguration & from_config, const std::string & from_path, + Poco::Util::AbstractConfiguration & to_config, const std::string & to_path); +template void copyConfigValue(const Poco::Util::AbstractConfiguration & from_config, const std::string & from_path, + Poco::Util::AbstractConfiguration & to_config, const std::string & to_path); +} + +} diff --git a/src/Storages/NamedCollectionConfiguration.h b/src/Storages/NamedCollectionConfiguration.h new file mode 100644 index 00000000000..7478dcf2d9a --- /dev/null +++ b/src/Storages/NamedCollectionConfiguration.h @@ -0,0 +1,44 @@ +#pragma once +#include + +namespace DB +{ + +using ConfigurationPtr = Poco::AutoPtr; +class SettingsChanges; + +namespace NamedCollectionConfiguration +{ + +ConfigurationPtr createEmptyConfiguration(const std::string & root_name); + +template T getConfigValue( + const Poco::Util::AbstractConfiguration & config, + const std::string & path); + +template T getConfigValueOrDefault( + const Poco::Util::AbstractConfiguration & config, + const std::string & path, + const T * default_value = nullptr); + +template void setConfigValue( + Poco::Util::AbstractConfiguration & config, + const std::string & path, + const T & value, + bool update = false); + +template void copyConfigValue( + const Poco::Util::AbstractConfiguration & from_config, + const std::string & from_path, + Poco::Util::AbstractConfiguration & to_config, + const std::string & to_path); + +void removeConfigValue( + Poco::Util::AbstractConfiguration & config, + const std::string & path); + +ConfigurationPtr createConfiguration(const std::string & root_name, const SettingsChanges & settings); + +} + +} diff --git a/src/Storages/NamedCollectionUtils.cpp b/src/Storages/NamedCollectionUtils.cpp new file mode 100644 index 00000000000..75d5aace664 --- /dev/null +++ b/src/Storages/NamedCollectionUtils.cpp @@ -0,0 +1,434 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace fs = std::filesystem; + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NAMED_COLLECTION_ALREADY_EXISTS; + extern const int NAMED_COLLECTION_DOESNT_EXIST; + extern const int BAD_ARGUMENTS; +} + +namespace NamedCollectionUtils +{ + +class LoadFromConfig +{ +private: + const Poco::Util::AbstractConfiguration & config; + +public: + explicit LoadFromConfig(const Poco::Util::AbstractConfiguration & config_) + : config(config_) {} + + std::vector listCollections() const + { + Poco::Util::AbstractConfiguration::Keys collections_names; + config.keys(NAMED_COLLECTIONS_CONFIG_PREFIX, collections_names); + return collections_names; + } + + NamedCollectionsMap getAll() const + { + NamedCollectionsMap result; + for (const auto & collection_name : listCollections()) + { + if (result.contains(collection_name)) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, + "Found duplicate named collection `{}`", + collection_name); + } + result.emplace(collection_name, get(collection_name)); + } + return result; + } + + MutableNamedCollectionPtr get(const std::string & collection_name) const + { + const auto collection_prefix = getCollectionPrefix(collection_name); + std::queue enumerate_input; + std::set enumerate_result; + + enumerate_input.push(collection_prefix); + collectKeys(config, std::move(enumerate_input), enumerate_result); + + /// Collection does not have any keys. + /// (`enumerate_result` == ). + const bool collection_is_empty = enumerate_result.size() == 1 + && *enumerate_result.begin() == collection_prefix; + std::set keys; + if (!collection_is_empty) + { + /// Skip collection prefix and add +1 to avoid '.' in the beginning. + for (const auto & path : enumerate_result) + keys.emplace(path.substr(collection_prefix.size() + 1)); + } + + return NamedCollection::create( + config, collection_name, collection_prefix, keys, SourceId::CONFIG, /* is_mutable */false); + } + +private: + static constexpr auto NAMED_COLLECTIONS_CONFIG_PREFIX = "named_collections"; + + static std::string getCollectionPrefix(const std::string & collection_name) + { + return fmt::format("{}.{}", NAMED_COLLECTIONS_CONFIG_PREFIX, collection_name); + } + + /// Enumerate keys paths of the config recursively. + /// E.g. if `enumerate_paths` = {"root.key1"} and config like + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// the `result` will contain two strings: "root.key1.key2" and "root.key1.key3.key4" + static void collectKeys( + const Poco::Util::AbstractConfiguration & config, + std::queue enumerate_paths, + std::set & result) + { + if (enumerate_paths.empty()) + return; + + auto initial_paths = std::move(enumerate_paths); + enumerate_paths = {}; + while (!initial_paths.empty()) + { + auto path = initial_paths.front(); + initial_paths.pop(); + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(path, keys); + + if (keys.empty()) + { + result.insert(path); + } + else + { + for (const auto & key : keys) + enumerate_paths.emplace(path + '.' + key); + } + } + + collectKeys(config, enumerate_paths, result); + } +}; + + +class LoadFromSQL : private WithContext +{ +private: + const std::string metadata_path; + +public: + explicit LoadFromSQL(ContextPtr context_) + : WithContext(context_) + , metadata_path( + fs::canonical(context_->getPath()) / NAMED_COLLECTIONS_METADATA_DIRECTORY) + { + if (fs::exists(metadata_path)) + cleanUp(); + else + fs::create_directories(metadata_path); + } + + std::vector listCollections() const + { + std::vector collection_names; + fs::directory_iterator it{metadata_path}; + for (; it != fs::directory_iterator{}; ++it) + { + const auto & current_path = it->path(); + if (current_path.extension() == ".sql") + { + collection_names.push_back(it->path().stem()); + } + else + { + LOG_WARNING( + &Poco::Logger::get("NamedCollectionsLoadFromSQL"), + "Unexpected file {} in named collections directory", + current_path.filename().string()); + } + } + return collection_names; + } + + NamedCollectionsMap getAll() const + { + NamedCollectionsMap result; + for (const auto & collection_name : listCollections()) + { + if (result.contains(collection_name)) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, + "Found duplicate named collection `{}`", + collection_name); + } + result.emplace(collection_name, get(collection_name)); + } + return result; + } + + MutableNamedCollectionPtr get(const std::string & collection_name) const + { + const auto query = readCreateQueryFromMetadata( + getMetadataPath(collection_name), + getContext()->getSettingsRef()); + return createNamedCollectionFromAST(query); + } + + MutableNamedCollectionPtr create(const ASTCreateNamedCollectionQuery & query) + { + writeCreateQueryToMetadata( + query, + getMetadataPath(query.collection_name), + getContext()->getSettingsRef()); + + return createNamedCollectionFromAST(query); + } + + void update(const ASTAlterNamedCollectionQuery & query) + { + const auto path = getMetadataPath(query.collection_name); + auto create_query = readCreateQueryFromMetadata(path, getContext()->getSettings()); + + std::unordered_map result_changes_map; + for (const auto & [name, value] : query.changes) + { + auto [it, inserted] = result_changes_map.emplace(name, value); + if (!inserted) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Value with key `{}` is used twice in the SET query", + name, query.collection_name); + } + } + + for (const auto & [name, value] : create_query.changes) + result_changes_map.emplace(name, value); + + for (const auto & delete_key : query.delete_keys) + { + auto it = result_changes_map.find(delete_key); + if (it == result_changes_map.end()) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot delete key `{}` because it does not exist in collection", + delete_key); + } + else + result_changes_map.erase(it); + } + + create_query.changes.clear(); + for (const auto & [name, value] : result_changes_map) + create_query.changes.emplace_back(name, value); + + writeCreateQueryToMetadata( + create_query, + getMetadataPath(query.collection_name), + getContext()->getSettingsRef(), + true); + } + + void remove(const std::string & collection_name) + { + if (!removeIfExists(collection_name)) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, + "Cannot remove collection `{}`, because it doesn't exist", + collection_name); + } + } + + bool removeIfExists(const std::string & collection_name) + { + auto collection_path = getMetadataPath(collection_name); + if (fs::exists(collection_path)) + { + fs::remove(collection_path); + return true; + } + return false; + } + +private: + static constexpr auto NAMED_COLLECTIONS_METADATA_DIRECTORY = "named_collections"; + + static MutableNamedCollectionPtr createNamedCollectionFromAST( + const ASTCreateNamedCollectionQuery & query) + { + const auto & collection_name = query.collection_name; + const auto config = NamedCollectionConfiguration::createConfiguration( + collection_name, query.changes); + + std::set keys; + for (const auto & [name, _] : query.changes) + keys.insert(name); + + return NamedCollection::create( + *config, collection_name, "", keys, SourceId::SQL, /* is_mutable */true); + } + + std::string getMetadataPath(const std::string & collection_name) const + { + return fs::path(metadata_path) / (escapeForFileName(collection_name) + ".sql"); + } + + /// Delete .tmp files. They could be left undeleted in case of + /// some exception or abrupt server restart. + void cleanUp() + { + fs::directory_iterator it{metadata_path}; + std::vector files_to_remove; + for (; it != fs::directory_iterator{}; ++it) + { + const auto & current_path = it->path(); + if (current_path.extension() == ".tmp") + files_to_remove.push_back(current_path); + } + for (const auto & file : files_to_remove) + fs::remove(file); + } + + static ASTCreateNamedCollectionQuery readCreateQueryFromMetadata( + const std::string & path, + const Settings & settings) + { + ReadBufferFromFile in(path); + std::string query; + readStringUntilEOF(query, in); + + ParserCreateNamedCollectionQuery parser; + auto ast = parseQuery(parser, query, "in file " + path, 0, settings.max_parser_depth); + const auto & create_query = ast->as(); + return create_query; + } + + static void writeCreateQueryToMetadata( + const ASTCreateNamedCollectionQuery & query, + const std::string & path, + const Settings & settings, + bool replace = false) + { + if (!replace && fs::exists(path)) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, + "Metadata file {} for named collection already exists", + path); + } + + auto tmp_path = path + ".tmp"; + String formatted_query = serializeAST(query); + WriteBufferFromFile out(tmp_path, formatted_query.size(), O_WRONLY | O_CREAT | O_EXCL); + writeString(formatted_query, out); + + out.next(); + if (settings.fsync_metadata) + out.sync(); + out.close(); + + fs::rename(tmp_path, path); + } +}; + +std::unique_lock lockNamedCollectionsTransaction() +{ + static std::mutex transaction_lock; + return std::unique_lock(transaction_lock); +} + +void loadFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = lockNamedCollectionsTransaction(); + NamedCollectionFactory::instance().add(LoadFromConfig(config).getAll()); +} + +void reloadFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = lockNamedCollectionsTransaction(); + auto collections = LoadFromConfig(config).getAll(); + auto & instance = NamedCollectionFactory::instance(); + instance.removeById(SourceId::CONFIG); + instance.add(collections); +} + +void loadFromSQL(ContextPtr context) +{ + auto lock = lockNamedCollectionsTransaction(); + NamedCollectionFactory::instance().add(LoadFromSQL(context).getAll()); +} + +void removeFromSQL(const std::string & collection_name, ContextPtr context) +{ + auto lock = lockNamedCollectionsTransaction(); + LoadFromSQL(context).remove(collection_name); + NamedCollectionFactory::instance().remove(collection_name); +} + +void removeIfExistsFromSQL(const std::string & collection_name, ContextPtr context) +{ + auto lock = lockNamedCollectionsTransaction(); + LoadFromSQL(context).removeIfExists(collection_name); + NamedCollectionFactory::instance().removeIfExists(collection_name); +} + +void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr context) +{ + auto lock = lockNamedCollectionsTransaction(); + NamedCollectionFactory::instance().add(query.collection_name, LoadFromSQL(context).create(query)); +} + +void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr context) +{ + auto lock = lockNamedCollectionsTransaction(); + LoadFromSQL(context).update(query); + + auto collection = NamedCollectionFactory::instance().getMutable(query.collection_name); + auto collection_lock = collection->lock(); + + for (const auto & [name, value] : query.changes) + collection->setOrUpdate(name, convertFieldToString(value)); + + for (const auto & key : query.delete_keys) + collection->remove(key); +} + +} + +} diff --git a/src/Storages/NamedCollectionUtils.h b/src/Storages/NamedCollectionUtils.h new file mode 100644 index 00000000000..8befc9cac3c --- /dev/null +++ b/src/Storages/NamedCollectionUtils.h @@ -0,0 +1,40 @@ +#pragma once +#include + +namespace Poco { namespace Util { class AbstractConfiguration; } } + +namespace DB +{ + +class ASTCreateNamedCollectionQuery; +class ASTAlterNamedCollectionQuery; + +namespace NamedCollectionUtils +{ + +enum class SourceId +{ + NONE = 0, + CONFIG = 1, + SQL = 2, +}; + +void loadFromConfig(const Poco::Util::AbstractConfiguration & config); +void reloadFromConfig(const Poco::Util::AbstractConfiguration & config); + +/// Load named collections from `context->getPath() / named_collections /`. +void loadFromSQL(ContextPtr context); + +/// Remove collection as well as its metadata from `context->getPath() / named_collections /`. +void removeFromSQL(const std::string & collection_name, ContextPtr context); +void removeIfExistsFromSQL(const std::string & collection_name, ContextPtr context); + +/// Create a new collection from AST and put it to `context->getPath() / named_collections /`. +void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr context); + +/// Update definition of already existing collection from AST and update result in `context->getPath() / named_collections /`. +void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr context); + +} + +} diff --git a/src/Storages/NamedCollections.cpp b/src/Storages/NamedCollections.cpp index 67847635f3f..d90225547ac 100644 --- a/src/Storages/NamedCollections.cpp +++ b/src/Storages/NamedCollections.cpp @@ -1,17 +1,11 @@ #include "NamedCollections.h" -#include -#include -#include #include -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include #include @@ -20,66 +14,13 @@ namespace DB namespace ErrorCodes { - extern const int UNKNOWN_NAMED_COLLECTION; + extern const int NAMED_COLLECTION_DOESNT_EXIST; extern const int NAMED_COLLECTION_ALREADY_EXISTS; - extern const int BAD_ARGUMENTS; - extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; + extern const int NAMED_COLLECTION_IS_IMMUTABLE; } -namespace -{ - constexpr auto NAMED_COLLECTIONS_CONFIG_PREFIX = "named_collections"; +namespace Configuration = NamedCollectionConfiguration; - std::string getCollectionPrefix(const std::string & collection_name) - { - return fmt::format("{}.{}", NAMED_COLLECTIONS_CONFIG_PREFIX, collection_name); - } - - /// Enumerate keys paths of the config recursively. - /// E.g. if `enumerate_paths` = {"root.key1"} and config like - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// the `result` will contain two strings: "root.key1.key2" and "root.key1.key3.key4" - void collectKeys( - const Poco::Util::AbstractConfiguration & config, - std::queue enumerate_paths, - std::set & result) - { - if (enumerate_paths.empty()) - return; - - auto initial_paths = std::move(enumerate_paths); - enumerate_paths = {}; - while (!initial_paths.empty()) - { - auto path = initial_paths.front(); - initial_paths.pop(); - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(path, keys); - - if (keys.empty()) - { - result.insert(path); - } - else - { - for (const auto & key : keys) - enumerate_paths.emplace(path + '.' + key); - } - } - - collectKeys(config, enumerate_paths, result); - } -} NamedCollectionFactory & NamedCollectionFactory::instance() { @@ -87,38 +28,6 @@ NamedCollectionFactory & NamedCollectionFactory::instance() return instance; } -void NamedCollectionFactory::initialize(const Poco::Util::AbstractConfiguration & config_) -{ - std::lock_guard lock(mutex); - if (is_initialized) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Named collection factory already initialized"); - } - - config = &config_; - is_initialized = true; -} - -void NamedCollectionFactory::reload(const Poco::Util::AbstractConfiguration & config_) -{ - std::lock_guard lock(mutex); - config = &config_; - loaded_named_collections.clear(); -} - -void NamedCollectionFactory::assertInitialized( - std::lock_guard & /* lock */) const -{ - if (!is_initialized) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Named collection factory must be initialized before being used"); - } -} - bool NamedCollectionFactory::exists(const std::string & collection_name) const { std::lock_guard lock(mutex); @@ -127,62 +36,84 @@ bool NamedCollectionFactory::exists(const std::string & collection_name) const bool NamedCollectionFactory::existsUnlocked( const std::string & collection_name, - std::lock_guard & lock) const + std::lock_guard & /* lock */) const { - assertInitialized(lock); - /// Named collections can be added via SQL command or via config. - /// Named collections from config are loaded on first access, - /// therefore it might not be in `named_collections` map yet. - return loaded_named_collections.contains(collection_name) - || config->has(getCollectionPrefix(collection_name)); + return loaded_named_collections.contains(collection_name); } NamedCollectionPtr NamedCollectionFactory::get(const std::string & collection_name) const { std::lock_guard lock(mutex); - assertInitialized(lock); - - if (!existsUnlocked(collection_name, lock)) + auto collection = tryGetUnlocked(collection_name, lock); + if (!collection) { throw Exception( - ErrorCodes::UNKNOWN_NAMED_COLLECTION, + ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, "There is no named collection `{}`", collection_name); } - - return getImpl(collection_name, lock); + return collection; } NamedCollectionPtr NamedCollectionFactory::tryGet(const std::string & collection_name) const { std::lock_guard lock(mutex); - assertInitialized(lock); - - if (!existsUnlocked(collection_name, lock)) - return nullptr; - - return getImpl(collection_name, lock); + return tryGetUnlocked(collection_name, lock); } -NamedCollectionPtr NamedCollectionFactory::getImpl( +MutableNamedCollectionPtr NamedCollectionFactory::getMutable( + const std::string & collection_name) const +{ + std::lock_guard lock(mutex); + auto collection = tryGetUnlocked(collection_name, lock); + if (!collection) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, + "There is no named collection `{}`", + collection_name); + } + else if (!collection->isMutable()) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_IS_IMMUTABLE, + "Cannot get collection `{}` for modification, " + "because collection was defined as immutable", + collection_name); + } + return collection; +} + +MutableNamedCollectionPtr NamedCollectionFactory::tryGetUnlocked( const std::string & collection_name, std::lock_guard & /* lock */) const { auto it = loaded_named_collections.find(collection_name); if (it == loaded_named_collections.end()) - { - it = loaded_named_collections.emplace( - collection_name, - NamedCollection::create(*config, collection_name)).first; - } + return nullptr; return it->second; } void NamedCollectionFactory::add( const std::string & collection_name, - NamedCollectionPtr collection) + MutableNamedCollectionPtr collection) { std::lock_guard lock(mutex); + return addUnlocked(collection_name, collection, lock); +} + +void NamedCollectionFactory::add(NamedCollectionsMap collections) +{ + std::lock_guard lock(mutex); + for (const auto & [collection_name, collection] : collections) + addUnlocked(collection_name, collection, lock); +} + +void NamedCollectionFactory::addUnlocked( + const std::string & collection_name, + MutableNamedCollectionPtr collection, + std::lock_guard & /* lock */) +{ auto [it, inserted] = loaded_named_collections.emplace(collection_name, collection); if (!inserted) { @@ -196,93 +127,104 @@ void NamedCollectionFactory::add( void NamedCollectionFactory::remove(const std::string & collection_name) { std::lock_guard lock(mutex); - assertInitialized(lock); - - if (!existsUnlocked(collection_name, lock)) + bool removed = removeIfExistsUnlocked(collection_name, lock); + if (!removed) { throw Exception( - ErrorCodes::UNKNOWN_NAMED_COLLECTION, + ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST, "There is no named collection `{}`", collection_name); } - - if (config->has(collection_name)) - { - throw Exception( - ErrorCodes::NOT_IMPLEMENTED, - "Collection {} is defined in config and cannot be removed", - collection_name); - } - - [[maybe_unused]] auto removed = loaded_named_collections.erase(collection_name); - assert(removed); } -NamedCollectionFactory::NamedCollections NamedCollectionFactory::getAll() const +void NamedCollectionFactory::removeIfExists(const std::string & collection_name) { std::lock_guard lock(mutex); - assertInitialized(lock); + removeIfExistsUnlocked(collection_name, lock); +} - NamedCollections result(loaded_named_collections); +bool NamedCollectionFactory::removeIfExistsUnlocked( + const std::string & collection_name, + std::lock_guard & lock) +{ + auto collection = tryGetUnlocked(collection_name, lock); + if (!collection) + return false; - Poco::Util::AbstractConfiguration::Keys config_collections_names; - config->keys(NAMED_COLLECTIONS_CONFIG_PREFIX, config_collections_names); - - for (const auto & collection_name : config_collections_names) + if (!collection->isMutable()) { - if (result.contains(collection_name)) - continue; - - result.emplace(collection_name, NamedCollection::create(*config, collection_name)); + throw Exception( + ErrorCodes::NAMED_COLLECTION_IS_IMMUTABLE, + "Cannot get collection `{}` for modification, " + "because collection was defined as immutable", + collection_name); } + loaded_named_collections.erase(collection_name); + return true; +} - return result; +void NamedCollectionFactory::removeById(NamedCollectionUtils::SourceId id) +{ + std::lock_guard lock(mutex); + std::erase_if( + loaded_named_collections, + [&](const auto & value) { return value.second->getSourceId() == id; }); +} + +NamedCollectionsMap NamedCollectionFactory::getAll() const +{ + std::lock_guard lock(mutex); + return loaded_named_collections; } class NamedCollection::Impl { private: - using ConfigurationPtr = Poco::AutoPtr; - - /// Named collection configuration - /// - /// ... - /// ConfigurationPtr config; Keys keys; + Impl(ConfigurationPtr config_, const Keys & keys_) : config(config_) , keys(keys_) {} + public: - Impl(const Poco::Util::AbstractConfiguration & config_, - const std::string & collection_name_, - const Keys & keys_) - : config(createEmptyConfiguration(collection_name_)) - , keys(keys_) + static ImplPtr create( + const Poco::Util::AbstractConfiguration & config, + const std::string & collection_name, + const std::string & collection_path, + const Keys & keys) { - auto collection_path = getCollectionPrefix(collection_name_); + auto collection_config = NamedCollectionConfiguration::createEmptyConfiguration(collection_name); for (const auto & key : keys) - copyConfigValue(config_, collection_path + '.' + key, *config, key); + Configuration::copyConfigValue( + config, collection_path + '.' + key, *collection_config, key); + + return std::unique_ptr(new Impl(collection_config, keys)); } template T get(const Key & key) const { - return getConfigValue(*config, key); + return Configuration::getConfigValue(*config, key); } template T getOrDefault(const Key & key, const T & default_value) const { - return getConfigValueOrDefault(*config, key, &default_value); + return Configuration::getConfigValueOrDefault(*config, key, &default_value); } template void set(const Key & key, const T & value, bool update_if_exists) { - setConfigValue(*config, key, value, update_if_exists); + Configuration::setConfigValue(*config, key, value, update_if_exists); if (!keys.contains(key)) keys.insert(key); } + ImplPtr createCopy(const std::string & collection_name_) const + { + return create(*config, collection_name_, "", keys); + } + void remove(const Key & key) { - removeConfigValue(*config, key); + Configuration::removeConfigValue(*config, key); [[maybe_unused]] auto removed = keys.erase(key); assert(removed); } @@ -292,11 +234,6 @@ public: return keys; } - ImplPtr copy() const - { - return std::make_unique(*this); - } - std::string dumpStructure() const { /// Convert a collection config like @@ -347,186 +284,108 @@ public: } return wb.str(); } - -private: - template static T getConfigValue( - const Poco::Util::AbstractConfiguration & config, - const std::string & path) - { - return getConfigValueOrDefault(config, path); - } - - template static T getConfigValueOrDefault( - const Poco::Util::AbstractConfiguration & config, - const std::string & path, - const T * default_value = nullptr) - { - if (!config.has(path)) - { - if (!default_value) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}`", path); - return *default_value; - } - - if constexpr (std::is_same_v) - return config.getString(path); - else if constexpr (std::is_same_v) - return config.getUInt64(path); - else if constexpr (std::is_same_v) - return config.getInt64(path); - else if constexpr (std::is_same_v) - return config.getDouble(path); - else - throw Exception( - ErrorCodes::NOT_IMPLEMENTED, - "Unsupported type in getConfigValueOrDefault(). " - "Supported types are String, UInt64, Int64, Float64"); - } - - template static void setConfigValue( - Poco::Util::AbstractConfiguration & config, - const std::string & path, - const T & value, - bool update = false) - { - if (!update && config.has(path)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Key `{}` already exists", path); - - if constexpr (std::is_same_v) - config.setString(path, value); - else if constexpr (std::is_same_v) - config.setUInt64(path, value); - else if constexpr (std::is_same_v) - config.setInt64(path, value); - else if constexpr (std::is_same_v) - config.setDouble(path, value); - else - throw Exception( - ErrorCodes::NOT_IMPLEMENTED, - "Unsupported type in setConfigValue(). " - "Supported types are String, UInt64, Int64, Float64"); - } - - template static void copyConfigValue( - const Poco::Util::AbstractConfiguration & from_config, - const std::string & from_path, - Poco::Util::AbstractConfiguration & to_config, - const std::string & to_path) - { - if (!from_config.has(from_path)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}`", from_path); - - if (to_config.has(to_path)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Key `{}` already exists", to_path); - - if constexpr (std::is_same_v) - to_config.setString(to_path, from_config.getString(from_path)); - else if constexpr (std::is_same_v) - to_config.setString(to_path, from_config.getString(from_path)); - else if constexpr (std::is_same_v) - to_config.setUInt64(to_path, from_config.getUInt64(from_path)); - else if constexpr (std::is_same_v) - to_config.setInt64(to_path, from_config.getInt64(from_path)); - else if constexpr (std::is_same_v) - to_config.setDouble(to_path, from_config.getDouble(from_path)); - else - throw Exception( - ErrorCodes::NOT_IMPLEMENTED, - "Unsupported type in copyConfigValue(). " - "Supported types are String, UInt64, Int64, Float64"); - } - - static void removeConfigValue( - Poco::Util::AbstractConfiguration & config, - const std::string & path) - { - if (!config.has(path)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}`", path); - config.remove(path); - } - - static ConfigurationPtr createEmptyConfiguration(const std::string & root_name) - { - using DocumentPtr = Poco::AutoPtr; - DocumentPtr xml_document(new Poco::XML::Document()); - xml_document->appendChild(xml_document->createElement(root_name)); - ConfigurationPtr config(new Poco::Util::XMLConfiguration(xml_document)); - return config; - } }; NamedCollection::NamedCollection( - const Poco::Util::AbstractConfiguration & config, - const std::string & collection_path, - const Keys & keys) - : NamedCollection(std::make_unique(config, collection_path, keys)) -{ -} - -NamedCollection::NamedCollection(ImplPtr pimpl_) + ImplPtr pimpl_, + const std::string & collection_name_, + SourceId source_id_, + bool is_mutable_) : pimpl(std::move(pimpl_)) + , collection_name(collection_name_) + , source_id(source_id_) + , is_mutable(is_mutable_) { } -NamedCollectionPtr NamedCollection::create( +MutableNamedCollectionPtr NamedCollection::create( const Poco::Util::AbstractConfiguration & config, - const std::string & collection_name) + const std::string & collection_name, + const std::string & collection_path, + const Keys & keys, + SourceId source_id, + bool is_mutable) { - const auto collection_prefix = getCollectionPrefix(collection_name); - std::queue enumerate_input; - std::set enumerate_result; - - enumerate_input.push(collection_prefix); - collectKeys(config, std::move(enumerate_input), enumerate_result); - - /// Collection does not have any keys. - /// (`enumerate_result` == ). - const bool collection_is_empty = enumerate_result.size() == 1; - std::set keys; - if (!collection_is_empty) - { - /// Skip collection prefix and add +1 to avoid '.' in the beginning. - for (const auto & path : enumerate_result) - keys.emplace(path.substr(collection_prefix.size() + 1)); - } - return std::make_unique(config, collection_name, keys); + auto impl = Impl::create(config, collection_name, collection_path, keys); + return std::unique_ptr( + new NamedCollection(std::move(impl), collection_name, source_id, is_mutable)); } template T NamedCollection::get(const Key & key) const { + std::lock_guard lock(mutex); return pimpl->get(key); } template T NamedCollection::getOrDefault(const Key & key, const T & default_value) const { + std::lock_guard lock(mutex); return pimpl->getOrDefault(key, default_value); } -template void NamedCollection::set(const Key & key, const T & value, bool update_if_exists) +template void NamedCollection::set(const Key & key, const T & value) { - pimpl->set(key, value, update_if_exists); + assertMutable(); + std::unique_lock lock(mutex, std::defer_lock); + if constexpr (!Locked) + lock.lock(); + pimpl->set(key, value, false); } -void NamedCollection::remove(const Key & key) +template void NamedCollection::setOrUpdate(const Key & key, const T & value) { + assertMutable(); + std::unique_lock lock(mutex, std::defer_lock); + if constexpr (!Locked) + lock.lock(); + pimpl->set(key, value, true); +} + +template void NamedCollection::remove(const Key & key) +{ + assertMutable(); + std::unique_lock lock(mutex, std::defer_lock); + if constexpr (!Locked) + lock.lock(); pimpl->remove(key); } -std::shared_ptr NamedCollection::duplicate() const +void NamedCollection::assertMutable() const { - return std::make_shared(pimpl->copy()); + if (!is_mutable) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_IS_IMMUTABLE, + "Cannot change named collection because it is immutable"); + } +} + +MutableNamedCollectionPtr NamedCollection::duplicate() const +{ + std::lock_guard lock(mutex); + auto impl = pimpl->createCopy(collection_name); + return std::unique_ptr( + new NamedCollection( + std::move(impl), collection_name, NamedCollectionUtils::SourceId::NONE, true)); } NamedCollection::Keys NamedCollection::getKeys() const { + std::lock_guard lock(mutex); return pimpl->getKeys(); } std::string NamedCollection::dumpStructure() const { + std::lock_guard lock(mutex); return pimpl->dumpStructure(); } +std::unique_lock NamedCollection::lock() +{ + return std::unique_lock(mutex); +} + template String NamedCollection::get(const NamedCollection::Key & key) const; template UInt64 NamedCollection::get(const NamedCollection::Key & key) const; template Int64 NamedCollection::get(const NamedCollection::Key & key) const; @@ -537,9 +396,25 @@ template UInt64 NamedCollection::getOrDefault(const NamedCollection::Key template Int64 NamedCollection::getOrDefault(const NamedCollection::Key & key, const Int64 & default_value) const; template Float64 NamedCollection::getOrDefault(const NamedCollection::Key & key, const Float64 & default_value) const; -template void NamedCollection::set(const NamedCollection::Key & key, const String & value, bool update_if_exists); -template void NamedCollection::set(const NamedCollection::Key & key, const UInt64 & value, bool update_if_exists); -template void NamedCollection::set(const NamedCollection::Key & key, const Int64 & value, bool update_if_exists); -template void NamedCollection::set(const NamedCollection::Key & key, const Float64 & value, bool update_if_exists); +template void NamedCollection::set(const NamedCollection::Key & key, const String & value); +template void NamedCollection::set(const NamedCollection::Key & key, const String & value); +template void NamedCollection::set(const NamedCollection::Key & key, const UInt64 & value); +template void NamedCollection::set(const NamedCollection::Key & key, const UInt64 & value); +template void NamedCollection::set(const NamedCollection::Key & key, const Int64 & value); +template void NamedCollection::set(const NamedCollection::Key & key, const Int64 & value); +template void NamedCollection::set(const NamedCollection::Key & key, const Float64 & value); +template void NamedCollection::set(const NamedCollection::Key & key, const Float64 & value); + +template void NamedCollection::setOrUpdate(const NamedCollection::Key & key, const String & value); +template void NamedCollection::setOrUpdate(const NamedCollection::Key & key, const String & value); +template void NamedCollection::setOrUpdate(const NamedCollection::Key & key, const UInt64 & value); +template void NamedCollection::setOrUpdate(const NamedCollection::Key & key, const UInt64 & value); +template void NamedCollection::setOrUpdate(const NamedCollection::Key & key, const Int64 & value); +template void NamedCollection::setOrUpdate(const NamedCollection::Key & key, const Int64 & value); +template void NamedCollection::setOrUpdate(const NamedCollection::Key & key, const Float64 & value); +template void NamedCollection::setOrUpdate(const NamedCollection::Key & key, const Float64 & value); + +template void NamedCollection::remove(const Key & key); +template void NamedCollection::remove(const Key & key); } diff --git a/src/Storages/NamedCollections.h b/src/Storages/NamedCollections.h index 83bb1dd964e..f7181c2b539 100644 --- a/src/Storages/NamedCollections.h +++ b/src/Storages/NamedCollections.h @@ -1,15 +1,13 @@ #pragma once - #include -#include +#include +#include +namespace Poco { namespace Util { class AbstractConfiguration; } } namespace DB { -class NamedCollection; -using NamedCollectionPtr = std::shared_ptr; - /** * Class to represent arbitrary-structured named collection object. * It can be defined via config or via SQL command. @@ -22,40 +20,58 @@ using NamedCollectionPtr = std::shared_ptr; */ class NamedCollection { -private: - class Impl; - using ImplPtr = std::unique_ptr; - - ImplPtr pimpl; - public: using Key = std::string; using Keys = std::set; + using SourceId = NamedCollectionUtils::SourceId; - static NamedCollectionPtr create( - const Poco::Util::AbstractConfiguration & config, - const std::string & collection_name); - - NamedCollection( + static MutableNamedCollectionPtr create( const Poco::Util::AbstractConfiguration & config, + const std::string & collection_name, const std::string & collection_path, - const Keys & keys); - - explicit NamedCollection(ImplPtr pimpl_); + const Keys & keys, + SourceId source_id_, + bool is_mutable_); template T get(const Key & key) const; template T getOrDefault(const Key & key, const T & default_value) const; - template void set(const Key & key, const T & value, bool update_if_exists = false); + std::unique_lock lock(); - void remove(const Key & key); + template void set(const Key & key, const T & value); - std::shared_ptr duplicate() const; + template void setOrUpdate(const Key & key, const T & value); + + template void remove(const Key & key); + + MutableNamedCollectionPtr duplicate() const; Keys getKeys() const; std::string dumpStructure() const; + + bool isMutable() const { return is_mutable; } + + SourceId getSourceId() const { return source_id; } + +private: + class Impl; + using ImplPtr = std::unique_ptr; + + NamedCollection( + ImplPtr pimpl_, + const std::string & collection_name, + SourceId source_id, + bool is_mutable); + + void assertMutable() const; + + ImplPtr pimpl; + const std::string collection_name; + const SourceId source_id; + const bool is_mutable; + mutable std::mutex mutex; }; /** @@ -66,42 +82,51 @@ class NamedCollectionFactory : boost::noncopyable public: static NamedCollectionFactory & instance(); - void initialize(const Poco::Util::AbstractConfiguration & config_); - - void reload(const Poco::Util::AbstractConfiguration & config_); - bool exists(const std::string & collection_name) const; NamedCollectionPtr get(const std::string & collection_name) const; NamedCollectionPtr tryGet(const std::string & collection_name) const; - void add( - const std::string & collection_name, - NamedCollectionPtr collection); + MutableNamedCollectionPtr getMutable(const std::string & collection_name) const; + + void add(const std::string & collection_name, MutableNamedCollectionPtr collection); + + void add(NamedCollectionsMap collections); + + void update(NamedCollectionsMap collections); void remove(const std::string & collection_name); - using NamedCollections = std::unordered_map; - NamedCollections getAll() const; + void removeIfExists(const std::string & collection_name); + + void removeById(NamedCollectionUtils::SourceId id); + + NamedCollectionsMap getAll() const; private: - void assertInitialized(std::lock_guard & lock) const; - - NamedCollectionPtr getImpl( - const std::string & collection_name, - std::lock_guard & lock) const; - bool existsUnlocked( const std::string & collection_name, std::lock_guard & lock) const; - mutable NamedCollections loaded_named_collections; + MutableNamedCollectionPtr tryGetUnlocked( + const std::string & collection_name, + std::lock_guard & lock) const; - const Poco::Util::AbstractConfiguration * config; + void addUnlocked( + const std::string & collection_name, + MutableNamedCollectionPtr collection, + std::lock_guard & lock); + + bool removeIfExistsUnlocked( + const std::string & collection_name, + std::lock_guard & lock); + + mutable NamedCollectionsMap loaded_named_collections; - bool is_initialized = false; mutable std::mutex mutex; + bool is_initialized = false; }; + } diff --git a/src/Storages/NamedCollections_fwd.h b/src/Storages/NamedCollections_fwd.h new file mode 100644 index 00000000000..47ebe81c91f --- /dev/null +++ b/src/Storages/NamedCollections_fwd.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace DB +{ + +class NamedCollection; +using NamedCollectionPtr = std::shared_ptr; +using MutableNamedCollectionPtr = std::shared_ptr; +using NamedCollectionsMap = std::map; + +} diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 57f5ddd86e6..bce3fee71f7 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -938,24 +938,24 @@ ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() bool StorageRabbitMQ::checkDependencies(const StorageID & table_id) { // Check if all dependencies are attached - auto dependencies = DatabaseCatalog::instance().getDependencies(table_id); - if (dependencies.empty()) + auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); + if (view_ids.empty()) return true; // Check the dependencies are ready? - for (const auto & db_tab : dependencies) + for (const auto & view_id : view_ids) { - auto table = DatabaseCatalog::instance().tryGetTable(db_tab, getContext()); - if (!table) + auto view = DatabaseCatalog::instance().tryGetTable(view_id, getContext()); + if (!view) return false; // If it materialized view, check it's target table - auto * materialized_view = dynamic_cast(table.get()); + auto * materialized_view = dynamic_cast(view.get()); if (materialized_view && !materialized_view->tryGetTargetTable()) return false; // Check all its dependencies - if (!checkDependencies(db_tab)) + if (!checkDependencies(view_id)) return false; } @@ -984,10 +984,10 @@ void StorageRabbitMQ::streamingToViewsFunc() auto table_id = getStorageID(); // Check if at least one direct dependency is attached - size_t dependencies_count = DatabaseCatalog::instance().getDependencies(table_id).size(); + size_t num_views = DatabaseCatalog::instance().getDependentViews(table_id).size(); bool rabbit_connected = connection->isConnected() || connection->reconnect(); - if (dependencies_count && rabbit_connected) + if (num_views && rabbit_connected) { initializeBuffers(); auto start_time = std::chrono::steady_clock::now(); @@ -1000,7 +1000,7 @@ void StorageRabbitMQ::streamingToViewsFunc() if (!checkDependencies(table_id)) break; - LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count); + LOG_DEBUG(log, "Started streaming to {} attached views", num_views); if (streamToViews()) { diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index c39e70745fd..b1b158a2aa5 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -46,7 +46,7 @@ void EmbeddedRocksDBSink::consume(Chunk chunk) size_t idx = 0; for (const auto & elem : block) { - elem.type->getDefaultSerialization()->serializeBinary(*elem.column, i, idx == primary_key_pos ? wb_key : wb_value); + elem.type->getDefaultSerialization()->serializeBinary(*elem.column, i, idx == primary_key_pos ? wb_key : wb_value, {}); ++idx; } status = batch.Put(wb_key.str(), wb_value.str()); diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 46ddb650eee..2fcedf550e8 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -259,7 +259,7 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt { wb_key.restart(); - column_it->type->getDefaultSerialization()->serializeBinary(*column, i, wb_key); + column_it->type->getDefaultSerialization()->serializeBinary(*column, i, wb_key, {}); auto status = batch.Delete(wb_key.str()); if (!status.ok()) throw Exception("RocksDB write error: " + status.ToString(), ErrorCodes::ROCKSDB_ERROR); diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 21be205c0f6..bd255a952dc 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -111,7 +111,7 @@ public: size_t idx = 0; for (const auto & elem : block) { - elem.type->getDefaultSerialization()->serializeBinary(*elem.column, i, idx == primary_key_pos ? wb_key : wb_value); + elem.type->getDefaultSerialization()->serializeBinary(*elem.column, i, idx == primary_key_pos ? wb_key : wb_value, {}); ++idx; } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index e256e087728..ed01ca9cec4 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -210,7 +210,7 @@ void StorageMaterializedView::drop() auto table_id = getStorageID(); const auto & select_query = getInMemoryMetadataPtr()->getSelectQuery(); if (!select_query.select_table_id.empty()) - DatabaseCatalog::instance().removeDependency(select_query.select_table_id, table_id); + DatabaseCatalog::instance().removeViewDependency(select_query.select_table_id, table_id); dropInnerTableIfAny(true, getContext()); } @@ -266,7 +266,7 @@ void StorageMaterializedView::alter( const auto & new_select = new_metadata.select; const auto & old_select = old_metadata.getSelectQuery(); - DatabaseCatalog::instance().updateDependency(old_select.select_table_id, table_id, new_select.select_table_id, table_id); + DatabaseCatalog::instance().updateViewDependency(old_select.select_table_id, table_id, new_select.select_table_id, table_id); new_metadata.setSelectQuery(new_select); } @@ -364,7 +364,7 @@ void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) } const auto & select_query = metadata_snapshot->getSelectQuery(); // TODO Actually we don't need to update dependency if MV has UUID, but then db and table name will be outdated - DatabaseCatalog::instance().updateDependency(select_query.select_table_id, old_table_id, select_query.select_table_id, getStorageID()); + DatabaseCatalog::instance().updateViewDependency(select_query.select_table_id, old_table_id, select_query.select_table_id, getStorageID()); } void StorageMaterializedView::startup() @@ -372,7 +372,7 @@ void StorageMaterializedView::startup() auto metadata_snapshot = getInMemoryMetadataPtr(); const auto & select_query = metadata_snapshot->getSelectQuery(); if (!select_query.select_table_id.empty()) - DatabaseCatalog::instance().addDependency(select_query.select_table_id, getStorageID()); + DatabaseCatalog::instance().addViewDependency(select_query.select_table_id, getStorageID()); } void StorageMaterializedView::shutdown() @@ -381,7 +381,7 @@ void StorageMaterializedView::shutdown() const auto & select_query = metadata_snapshot->getSelectQuery(); /// Make sure the dependency is removed after DETACH TABLE if (!select_query.select_table_id.empty()) - DatabaseCatalog::instance().removeDependency(select_query.select_table_id, getStorageID()); + DatabaseCatalog::instance().removeViewDependency(select_query.select_table_id, getStorageID()); } StoragePtr StorageMaterializedView::getTargetTable() const diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 030ccb5ace2..bd57579505a 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1541,7 +1541,7 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont LOG_TEST(log, "Made {} empty parts in order to cover {} parts. Empty parts: {}, covered parts: {}. With txn {}", future_parts.size(), parts.size(), - fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNamesWithStates(parts), ", "), + fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "), transaction.getTID()); captureTmpDirectoryHolders(*this, future_parts); diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index ec970654b6e..b10f3c65ebf 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include #include @@ -103,8 +105,7 @@ Pipe StorageS3Cluster::read( auto callback = std::make_shared([iterator]() mutable -> String { return iterator->next(); }); /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) - Block header = - InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + auto interpreter = InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()); const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{}; @@ -112,11 +113,21 @@ Pipe StorageS3Cluster::read( const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; - ASTPtr query_to_send = query_info.original_query->clone(); + ASTPtr query_to_send = interpreter.getQueryInfo().query->clone(); if (add_columns_structure_to_query) addColumnsStructureToQueryWithClusterEngine( query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 5, getName()); + RestoreQualifiedNamesVisitor::Data data; + data.distributed_table = DatabaseAndTableWithAlias(*getTableExpression(query_info.query->as(), 0)); + data.remote_table.database = context->getCurrentDatabase(); + data.remote_table.table = getName(); + RestoreQualifiedNamesVisitor(data).visit(query_to_send); + AddDefaultDatabaseVisitor visitor(context, context->getCurrentDatabase(), + /* only_replace_current_database_function_= */false, + /* only_replace_in_join_= */true); + visitor.visit(query_to_send); + const auto & current_settings = context->getSettingsRef(); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); for (const auto & shard_info : cluster->getShardsInfo()) @@ -128,7 +139,7 @@ Pipe StorageS3Cluster::read( shard_info.pool, std::vector{try_result}, queryToString(query_to_send), - header, + interpreter.getSampleBlock(), context, /*throttler=*/nullptr, scalars, diff --git a/src/Storages/System/StorageSystemAsynchronousMetrics.cpp b/src/Storages/System/StorageSystemAsynchronousMetrics.cpp index e2f62b902b7..843c7cb85e1 100644 --- a/src/Storages/System/StorageSystemAsynchronousMetrics.cpp +++ b/src/Storages/System/StorageSystemAsynchronousMetrics.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 72301a56d49..e1611f1ecfd 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -348,26 +348,26 @@ protected: res_columns[res_index++]->insert(static_cast(database->getObjectMetadataModificationTime(table_name))); { - Array dependencies_table_name_array; - Array dependencies_database_name_array; + Array views_table_name_array; + Array views_database_name_array; if (columns_mask[src_index] || columns_mask[src_index + 1]) { - const auto dependencies = DatabaseCatalog::instance().getDependencies(StorageID(database_name, table_name)); + const auto view_ids = DatabaseCatalog::instance().getDependentViews(StorageID(database_name, table_name)); - dependencies_table_name_array.reserve(dependencies.size()); - dependencies_database_name_array.reserve(dependencies.size()); - for (const auto & dependency : dependencies) + views_table_name_array.reserve(view_ids.size()); + views_database_name_array.reserve(view_ids.size()); + for (const auto & view_id : view_ids) { - dependencies_table_name_array.push_back(dependency.table_name); - dependencies_database_name_array.push_back(dependency.database_name); + views_table_name_array.push_back(view_id.table_name); + views_database_name_array.push_back(view_id.database_name); } } if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependencies_database_name_array); + res_columns[res_index++]->insert(views_database_name_array); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependencies_table_name_array); + res_columns[res_index++]->insert(views_table_name_array); } if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2]) @@ -513,37 +513,38 @@ protected: if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2] || columns_mask[src_index + 3]) { - DependenciesInfo info = DatabaseCatalog::instance().getLoadingDependenciesInfo({database_name, table_name}); + auto dependencies = DatabaseCatalog::instance().getDependencies(StorageID{database_name, table_name}); + auto dependents = DatabaseCatalog::instance().getDependents(StorageID{database_name, table_name}); - Array loading_dependencies_databases; - Array loading_dependencies_tables; - loading_dependencies_databases.reserve(info.dependencies.size()); - loading_dependencies_tables.reserve(info.dependencies.size()); - for (auto && dependency : info.dependencies) + Array dependencies_databases; + Array dependencies_tables; + dependencies_databases.reserve(dependencies.size()); + dependencies_tables.reserve(dependencies.size()); + for (const auto & dependency : dependencies) { - loading_dependencies_databases.push_back(dependency.database); - loading_dependencies_tables.push_back(dependency.table); + dependencies_databases.push_back(dependency.database_name); + dependencies_tables.push_back(dependency.table_name); } - Array loading_dependent_databases; - Array loading_dependent_tables; - loading_dependent_databases.reserve(info.dependencies.size()); - loading_dependent_tables.reserve(info.dependencies.size()); - for (auto && dependent : info.dependent_database_objects) + Array dependents_databases; + Array dependents_tables; + dependents_databases.reserve(dependents.size()); + dependents_tables.reserve(dependents.size()); + for (const auto & dependent : dependents) { - loading_dependent_databases.push_back(dependent.database); - loading_dependent_tables.push_back(dependent.table); + dependents_databases.push_back(dependent.database_name); + dependents_tables.push_back(dependent.table_name); } if (columns_mask[src_index++]) - res_columns[res_index++]->insert(loading_dependencies_databases); + res_columns[res_index++]->insert(dependencies_databases); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(loading_dependencies_tables); + res_columns[res_index++]->insert(dependencies_tables); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(loading_dependent_databases); + res_columns[res_index++]->insert(dependents_databases); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(loading_dependent_tables); + res_columns[res_index++]->insert(dependents_tables); } } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index c0bc5ad8da9..442a7822e33 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -471,7 +471,7 @@ void StorageWindowView::alter( create_interpreter.setInternal(true); create_interpreter.execute(); - DatabaseCatalog::instance().addDependency(select_table_id, table_id); + DatabaseCatalog::instance().addViewDependency(select_table_id, table_id); shutdown_called = false; @@ -1566,7 +1566,7 @@ void StorageWindowView::writeIntoWindowView( void StorageWindowView::startup() { - DatabaseCatalog::instance().addDependency(select_table_id, getStorageID()); + DatabaseCatalog::instance().addViewDependency(select_table_id, getStorageID()); fire_task->activate(); clean_cache_task->activate(); @@ -1586,17 +1586,17 @@ void StorageWindowView::shutdown() fire_task->deactivate(); auto table_id = getStorageID(); - DatabaseCatalog::instance().removeDependency(select_table_id, table_id); + DatabaseCatalog::instance().removeViewDependency(select_table_id, table_id); } void StorageWindowView::checkTableCanBeDropped() const { auto table_id = getStorageID(); - Dependencies dependencies = DatabaseCatalog::instance().getDependencies(table_id); - if (!dependencies.empty()) + auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); + if (!view_ids.empty()) { - StorageID dependent_table_id = dependencies.front(); - throw Exception("Table has dependency " + dependent_table_id.getNameForLogs(), ErrorCodes::TABLE_WAS_NOT_DROPPED); + StorageID view_id = *view_ids.begin(); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Table has dependency {}", view_id); } } diff --git a/src/Storages/tests/gtest_named_collections.cpp b/src/Storages/tests/gtest_named_collections.cpp index 5ba9156bcd9..369e8ec44f6 100644 --- a/src/Storages/tests/gtest_named_collections.cpp +++ b/src/Storages/tests/gtest_named_collections.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -28,7 +29,7 @@ TEST(NamedCollections, SimpleConfig) Poco::AutoPtr document = dom_parser.parseString(xml); Poco::AutoPtr config = new Poco::Util::XMLConfiguration(document); - NamedCollectionFactory::instance().initialize(*config); + NamedCollectionUtils::loadFromConfig(*config); ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection1")); ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection2")); @@ -76,16 +77,16 @@ key5: 5 key6: 6.6 )CONFIG"); - collection2_copy->set("key4", "value44", true); - ASSERT_TRUE(collection2_copy->get("key4") == "value44"); - ASSERT_TRUE(collection2->get("key4") == "value4"); + collection2_copy->setOrUpdate("key4", "value44"); + ASSERT_EQ(collection2_copy->get("key4"), "value44"); + ASSERT_EQ(collection2->get("key4"), "value4"); collection2_copy->remove("key4"); - ASSERT_TRUE(collection2_copy->getOrDefault("key4", "N") == "N"); - ASSERT_TRUE(collection2->getOrDefault("key4", "N") == "value4"); + ASSERT_EQ(collection2_copy->getOrDefault("key4", "N"), "N"); + ASSERT_EQ(collection2->getOrDefault("key4", "N"), "value4"); - collection2_copy->set("key4", "value45"); - ASSERT_TRUE(collection2_copy->getOrDefault("key4", "N") == "value45"); + collection2_copy->setOrUpdate("key4", "value45"); + ASSERT_EQ(collection2_copy->getOrDefault("key4", "N"), "value45"); NamedCollectionFactory::instance().remove("collection2_copy"); ASSERT_FALSE(NamedCollectionFactory::instance().exists("collection2_copy")); @@ -97,7 +98,7 @@ TEST(NamedCollections, NestedConfig) { std::string xml(R"CONFIG( - + value1 @@ -110,21 +111,22 @@ TEST(NamedCollections, NestedConfig) - + )CONFIG"); Poco::XML::DOMParser dom_parser; Poco::AutoPtr document = dom_parser.parseString(xml); Poco::AutoPtr config = new Poco::Util::XMLConfiguration(document); - NamedCollectionFactory::instance().reload(*config); - ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection1")); + NamedCollectionUtils::loadFromConfig(*config); - auto collection1 = NamedCollectionFactory::instance().get("collection1"); - ASSERT_TRUE(collection1 != nullptr); + ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection3")); - ASSERT_EQ(collection1->dumpStructure(), + auto collection = NamedCollectionFactory::instance().get("collection3"); + ASSERT_TRUE(collection != nullptr); + + ASSERT_EQ(collection->dumpStructure(), R"CONFIG(key1: key1_1: value1 key2: @@ -135,9 +137,9 @@ key2: key2_5: 5 )CONFIG"); - ASSERT_EQ(collection1->get("key1.key1_1"), "value1"); - ASSERT_EQ(collection1->get("key2.key2_1"), "value2_1"); - ASSERT_EQ(collection1->get("key2.key2_2.key2_3.key2_4"), 4); - ASSERT_EQ(collection1->get("key2.key2_2.key2_3.key2_5"), 5); + ASSERT_EQ(collection->get("key1.key1_1"), "value1"); + ASSERT_EQ(collection->get("key2.key2_1"), "value2_1"); + ASSERT_EQ(collection->get("key2.key2_2.key2_3.key2_4"), 4); + ASSERT_EQ(collection->get("key2.key2_2.key2_3.key2_5"), 5); } diff --git a/tests/ci/.mypy.ini b/tests/ci/.mypy.ini new file mode 100644 index 00000000000..7326675067c --- /dev/null +++ b/tests/ci/.mypy.ini @@ -0,0 +1,16 @@ +[mypy] +warn_no_return = False +warn_unused_configs = True +disallow_subclassing_any = True +disallow_untyped_calls = False +disallow_untyped_defs = False +disallow_incomplete_defs = True +check_untyped_defs = True +disallow_untyped_decorators = True +no_implicit_optional = True +warn_redundant_casts = True +warn_unused_ignores = True +warn_return_any = True +no_implicit_reexport = True +strict_equality = True +strict_concatenate = True diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py index e5f37f2940b..e5215fc4c42 100644 --- a/tests/ci/bugfix_validate_check.py +++ b/tests/ci/bugfix_validate_check.py @@ -41,7 +41,7 @@ def process_result(file_path): if is_ok and report_url == "null": return is_ok, None - status = f'OK: Bug reproduced (Report' + status = f'OK: Bug reproduced (Report)' if not is_ok: status = f'Bug is not reproduced (Report)' test_results.append([f"{prefix}: {description}", status]) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index d668dbe0498..c9e8dac2c00 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -121,7 +121,7 @@ def check_for_success_run( s3_prefix: str, build_name: str, build_config: BuildConfig, -): +) -> None: logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix) logging.info("Checking for artifacts in %s", logged_prefix) try: @@ -174,7 +174,7 @@ def create_json_artifact( build_config: BuildConfig, elapsed: int, success: bool, -): +) -> None: subprocess.check_call( f"echo 'BUILD_URLS=build_urls_{build_name}' >> $GITHUB_ENV", shell=True ) @@ -218,7 +218,7 @@ def upload_master_static_binaries( build_config: BuildConfig, s3_helper: S3Helper, build_output_path: str, -): +) -> None: """Upload binary artifacts to a static S3 links""" static_binary_name = build_config.get("static_binary_name", False) if pr_info.number != 0: diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 58997bed253..1a2fdedefed 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -5,7 +5,7 @@ import logging import os import sys import time -from typing import List, Optional +from typing import Any, List, Optional import requests # type: ignore @@ -18,7 +18,7 @@ def get_with_retries( url: str, retries: int = DOWNLOAD_RETRIES_COUNT, sleep: int = 3, - **kwargs, + **kwargs: Any, ) -> requests.Response: logging.info( "Getting URL with %i tries and sleep %i in between: %s", retries, sleep, url @@ -41,18 +41,18 @@ def get_with_retries( return response -def get_build_name_for_check(check_name) -> str: - return CI_CONFIG["tests_config"][check_name]["required_build"] +def get_build_name_for_check(check_name: str) -> str: + return CI_CONFIG["tests_config"][check_name]["required_build"] # type: ignore -def read_build_urls(build_name, reports_path) -> List[str]: +def read_build_urls(build_name: str, reports_path: str) -> List[str]: for root, _, files in os.walk(reports_path): for f in files: if build_name in f: logging.info("Found build report json %s", f) with open(os.path.join(root, f), "r", encoding="utf-8") as file_handler: build_report = json.load(file_handler) - return build_report["build_urls"] + return build_report["build_urls"] # type: ignore return [] diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 673b0204864..03e18d7766e 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -19,7 +19,7 @@ from env_helper import ( from report import create_build_html_report from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo +from pr_info import NeedsDataType, PRInfo from commit_status_helper import ( get_commit, update_mergeable_check, @@ -28,7 +28,7 @@ from ci_config import CI_CONFIG from rerun_helper import RerunHelper -NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH") +NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH", "") class BuildResult: @@ -98,7 +98,7 @@ def get_failed_report( def process_report( - build_report, + build_report: dict, ) -> Tuple[List[BuildResult], List[List[str]], List[str]]: build_config = build_report["build_config"] build_result = BuildResult( @@ -144,16 +144,14 @@ def main(): os.makedirs(temp_path) build_check_name = sys.argv[1] - needs_data = None + needs_data = {} # type: NeedsDataType required_builds = 0 if os.path.exists(NEEDS_DATA_PATH): with open(NEEDS_DATA_PATH, "rb") as file_handler: needs_data = json.load(file_handler) required_builds = len(needs_data) - if needs_data is not None and all( - i["result"] == "skipped" for i in needs_data.values() - ): + if needs_data and all(i["result"] == "skipped" for i in needs_data.values()): logging.info("All builds are skipped, exiting") sys.exit(0) @@ -218,19 +216,21 @@ def main(): build_logs = [] for build_report in build_reports: - build_result, build_artifacts_url, build_logs_url = process_report(build_report) - logging.info( - "Got %s artifact groups for build report report", len(build_result) + _build_results, build_artifacts_url, build_logs_url = process_report( + build_report ) - build_results.extend(build_result) + logging.info( + "Got %s artifact groups for build report report", len(_build_results) + ) + build_results.extend(_build_results) build_artifacts.extend(build_artifacts_url) build_logs.extend(build_logs_url) for failed_job in missing_build_names: - build_result, build_artifacts_url, build_logs_url = get_failed_report( + _build_results, build_artifacts_url, build_logs_url = get_failed_report( failed_job ) - build_results.extend(build_result) + build_results.extend(_build_results) build_artifacts.extend(build_artifacts_url) build_logs.extend(build_logs_url) diff --git a/tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.reference b/tests/ci/cancel_and_rerun_workflow_lambda/__init__.py similarity index 100% rename from tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.reference rename to tests/ci/cancel_and_rerun_workflow_lambda/__init__.py diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py index ebdfe2fdb5b..eeda7feb985 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -13,14 +13,16 @@ import jwt import requests # type: ignore import boto3 # type: ignore -PULL_REQUEST_CI = "PullRequestCI" + +NEED_RERUN_ON_EDITED = { + "PullRequestCI", + "DocsCheck", +} NEED_RERUN_OR_CANCELL_WORKFLOWS = { - PULL_REQUEST_CI, - "DocsCheck", "DocsReleaseChecks", "BackportPR", -} +}.union(NEED_RERUN_ON_EDITED) MAX_RETRY = 5 @@ -106,7 +108,7 @@ def _exec_get_with_retry(url: str, token: str) -> dict: try: response = requests.get(url, headers=headers) response.raise_for_status() - return response.json() + return response.json() # type: ignore except Exception as ex: print("Got exception executing request", ex) time.sleep(i + 1) @@ -130,8 +132,7 @@ WorkflowDescription = namedtuple( def get_workflows_description_for_pull_request( - pull_request_event, - token, + pull_request_event: dict, token: str ) -> List[WorkflowDescription]: head_repo = pull_request_event["head"]["repo"]["full_name"] head_branch = pull_request_event["head"]["ref"] @@ -193,7 +194,7 @@ def get_workflows_description_for_pull_request( def get_workflow_description_fallback( - pull_request_event, token + pull_request_event: dict, token: str ) -> List[WorkflowDescription]: head_repo = pull_request_event["head"]["repo"]["full_name"] head_branch = pull_request_event["head"]["ref"] @@ -241,7 +242,7 @@ def get_workflow_description_fallback( WorkflowDescription( url=wf["url"], run_id=wf["id"], - name=workflow["name"], + name=wf["name"], head_sha=wf["head_sha"], status=wf["status"], rerun_url=wf["rerun_url"], @@ -254,7 +255,7 @@ def get_workflow_description_fallback( return workflow_descriptions -def get_workflow_description(workflow_url, token) -> WorkflowDescription: +def get_workflow_description(workflow_url: str, token: str) -> WorkflowDescription: workflow = _exec_get_with_retry(workflow_url, token) return WorkflowDescription( url=workflow["url"], @@ -331,11 +332,11 @@ def main(event): workflow_descriptions or get_workflow_description_fallback(pull_request, token) ) - workflow_descriptions.sort(key=lambda x: x.run_id) + workflow_descriptions.sort(key=lambda x: x.run_id) # type: ignore most_recent_workflow = workflow_descriptions[-1] if ( most_recent_workflow.status == "completed" - and most_recent_workflow.name == PULL_REQUEST_CI + and most_recent_workflow.name in NEED_RERUN_ON_EDITED ): print( "The PR's body is changed and workflow is finished. " @@ -376,7 +377,7 @@ def main(event): print("Not found any workflows") return - workflow_descriptions.sort(key=lambda x: x.run_id) + workflow_descriptions.sort(key=lambda x: x.run_id) # type: ignore most_recent_workflow = workflow_descriptions[-1] print("Latest workflow", most_recent_workflow) if ( diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index d1c9d3d394c..b3e90feef2a 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -79,7 +79,7 @@ Merge it only if you intend to backport changes to the target branch, otherwise self.backport_pr = None # type: Optional[PullRequest] self._backported = None # type: Optional[bool] self.git_prefix = ( # All commits to cherrypick are done as robot-clickhouse - "git -c user.email=robot-clickhouse@clickhouse.com " + "git -c user.email=robot-clickhouse@users.noreply.github.com " "-c user.name=robot-clickhouse -c commit.gpgsign=false" ) self.pre_check() @@ -92,7 +92,8 @@ Merge it only if you intend to backport changes to the target branch, otherwise if branch_updated: self._backported = True - def pop_prs(self, prs: PullRequests): + def pop_prs(self, prs: PullRequests) -> None: + """the method processes all prs and pops the ReleaseBranch related prs""" to_pop = [] # type: List[int] for i, pr in enumerate(prs): if self.name not in pr.head.ref: @@ -105,14 +106,14 @@ Merge it only if you intend to backport changes to the target branch, otherwise to_pop.append(i) else: logging.error( - "PR #%s doesn't head ref starting with known suffix", + "head ref of PR #%s isn't starting with known suffix", pr.number, ) for i in reversed(to_pop): # Going from the tail to keep the order and pop greater index first prs.pop(i) - def process(self, dry_run: bool): + def process(self, dry_run: bool) -> None: if self.backported: return if not self.cherrypick_pr: @@ -209,6 +210,7 @@ Merge it only if you intend to backport changes to the target branch, otherwise self._assign_new_pr(self.cherrypick_pr) def create_backport(self): + assert self.cherrypick_pr is not None # Checkout the backport branch from the remote and make all changes to # apply like they are only one cherry-pick commit on top of release git_runner(f"{self.git_prefix} checkout -f {self.backport_branch}") @@ -239,7 +241,7 @@ Merge it only if you intend to backport changes to the target branch, otherwise self.backport_pr.add_to_labels(Labels.BACKPORT) self._assign_new_pr(self.backport_pr) - def _assign_new_pr(self, new_pr: PullRequest): + def _assign_new_pr(self, new_pr: PullRequest) -> None: """Assign `new_pr` to author, merger and assignees of an original PR""" # It looks there some race when multiple .add_to_assignees are executed, # so we'll add all at once @@ -340,7 +342,7 @@ class Backport: ) self.error = e - def process_pr(self, pr: PullRequest): + def process_pr(self, pr: PullRequest) -> None: pr_labels = [label.name for label in pr.labels] if Labels.MUST_BACKPORT in pr_labels: branches = [ @@ -403,7 +405,7 @@ class Backport: # And check it after the running self.mark_pr_backported(pr) - def mark_pr_backported(self, pr: PullRequest): + def mark_pr_backported(self, pr: PullRequest) -> None: if self.dry_run: logging.info("DRY RUN: would mark PR #%s as done", pr.number) return @@ -488,7 +490,8 @@ def main(): gh = GitHub(token, per_page=100) bp = Backport(gh, args.repo, args.dry_run) - bp.gh.cache_path = str(f"{TEMP_PATH}/gh_cache") + # https://github.com/python/mypy/issues/3004 + bp.gh.cache_path = f"{TEMP_PATH}/gh_cache" # type: ignore bp.receive_release_prs() bp.receive_prs_for_backport() bp.process_backports() diff --git a/tests/ci/ci_runners_metrics_lambda/app.py b/tests/ci/ci_runners_metrics_lambda/app.py index c1b20beb599..2bc568bb462 100644 --- a/tests/ci/ci_runners_metrics_lambda/app.py +++ b/tests/ci/ci_runners_metrics_lambda/app.py @@ -12,11 +12,12 @@ import json import time from collections import namedtuple from datetime import datetime +from typing import Dict, List, Tuple import jwt -import requests -import boto3 -from botocore.exceptions import ClientError +import requests # type: ignore +import boto3 # type: ignore +from botocore.exceptions import ClientError # type: ignore UNIVERSAL_LABEL = "universal" RUNNER_TYPE_LABELS = [ @@ -29,8 +30,13 @@ RUNNER_TYPE_LABELS = [ "style-checker-aarch64", ] +RunnerDescription = namedtuple( + "RunnerDescription", ["id", "name", "tags", "offline", "busy"] +) +RunnerDescriptions = List[RunnerDescription] -def get_dead_runners_in_ec2(runners): + +def get_dead_runners_in_ec2(runners: RunnerDescriptions) -> RunnerDescriptions: ids = { runner.name: runner for runner in runners @@ -92,7 +98,7 @@ def get_dead_runners_in_ec2(runners): return result_to_delete -def get_lost_ec2_instances(runners): +def get_lost_ec2_instances(runners: RunnerDescriptions) -> List[dict]: client = boto3.client("ec2") reservations = client.describe_instances( Filters=[{"Name": "tag-key", "Values": ["github:runner-type"]}] @@ -130,7 +136,7 @@ def get_lost_ec2_instances(runners): return lost_instances -def get_key_and_app_from_aws(): +def get_key_and_app_from_aws() -> Tuple[str, int]: secret_name = "clickhouse_github_secret_key" session = boto3.session.Session() client = session.client( @@ -146,7 +152,7 @@ def handler(event, context): main(private_key, app_id, True, True) -def get_installation_id(jwt_token): +def get_installation_id(jwt_token: str) -> int: headers = { "Authorization": f"Bearer {jwt_token}", "Accept": "application/vnd.github.v3+json", @@ -157,10 +163,12 @@ def get_installation_id(jwt_token): for installation in data: if installation["account"]["login"] == "ClickHouse": installation_id = installation["id"] - return installation_id + break + + return installation_id # type: ignore -def get_access_token(jwt_token, installation_id): +def get_access_token(jwt_token: str, installation_id: int) -> str: headers = { "Authorization": f"Bearer {jwt_token}", "Accept": "application/vnd.github.v3+json", @@ -171,15 +179,10 @@ def get_access_token(jwt_token, installation_id): ) response.raise_for_status() data = response.json() - return data["token"] + return data["token"] # type: ignore -RunnerDescription = namedtuple( - "RunnerDescription", ["id", "name", "tags", "offline", "busy"] -) - - -def list_runners(access_token): +def list_runners(access_token: str) -> RunnerDescriptions: headers = { "Authorization": f"token {access_token}", "Accept": "application/vnd.github.v3+json", @@ -225,8 +228,10 @@ def list_runners(access_token): return result -def group_runners_by_tag(listed_runners): - result = {} +def group_runners_by_tag( + listed_runners: RunnerDescriptions, +) -> Dict[str, RunnerDescriptions]: + result = {} # type: Dict[str, RunnerDescriptions] def add_to_result(tag, runner): if tag not in result: @@ -248,7 +253,9 @@ def group_runners_by_tag(listed_runners): return result -def push_metrics_to_cloudwatch(listed_runners, namespace): +def push_metrics_to_cloudwatch( + listed_runners: RunnerDescriptions, namespace: str +) -> None: client = boto3.client("cloudwatch") metrics_data = [] busy_runners = sum( @@ -278,7 +285,7 @@ def push_metrics_to_cloudwatch(listed_runners, namespace): } ) if total_active_runners == 0: - busy_ratio = 100 + busy_ratio = 100.0 else: busy_ratio = busy_runners / total_active_runners * 100 @@ -293,7 +300,7 @@ def push_metrics_to_cloudwatch(listed_runners, namespace): client.put_metric_data(Namespace=namespace, MetricData=metrics_data) -def delete_runner(access_token, runner): +def delete_runner(access_token: str, runner: RunnerDescription) -> bool: headers = { "Authorization": f"token {access_token}", "Accept": "application/vnd.github.v3+json", @@ -305,10 +312,15 @@ def delete_runner(access_token, runner): ) response.raise_for_status() print(f"Response code deleting {runner.name} is {response.status_code}") - return response.status_code == 204 + return bool(response.status_code == 204) -def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_runners): +def main( + github_secret_key: str, + github_app_id: int, + push_to_cloudwatch: bool, + delete_offline_runners: bool, +) -> None: payload = { "iat": int(time.time()) - 60, "exp": int(time.time()) + (10 * 60), diff --git a/tests/ci/codebrowser_check.py b/tests/ci/codebrowser_check.py index 97036c6fc7b..412bcdf8818 100644 --- a/tests/ci/codebrowser_check.py +++ b/tests/ci/codebrowser_check.py @@ -7,14 +7,21 @@ import logging from github import Github -from env_helper import IMAGES_PATH, REPO_COPY, S3_TEST_REPORTS_BUCKET, S3_DOWNLOAD -from stopwatch import Stopwatch -from upload_result_helper import upload_results -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token +from env_helper import ( + IMAGES_PATH, + REPO_COPY, + S3_DOWNLOAD, + S3_TEST_REPORTS_BUCKET, + TEMP_PATH, +) from commit_status_helper import post_commit_status from docker_pull_helper import get_image_with_version +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from s3_helper import S3Helper +from stopwatch import Stopwatch from tee_popen import TeePopen +from upload_result_helper import upload_results NAME = "Woboq Build" @@ -33,17 +40,16 @@ if __name__ == "__main__": stopwatch = Stopwatch() - temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) - gh = Github(get_best_robot_token(), per_page=100) + pr_info = PRInfo() - if not os.path.exists(temp_path): - os.makedirs(temp_path) + if not os.path.exists(TEMP_PATH): + os.makedirs(TEMP_PATH) docker_image = get_image_with_version(IMAGES_PATH, "clickhouse/codebrowser") s3_helper = S3Helper() - result_path = os.path.join(temp_path, "result_path") + result_path = os.path.join(TEMP_PATH, "result_path") if not os.path.exists(result_path): os.makedirs(result_path) @@ -51,7 +57,7 @@ if __name__ == "__main__": logging.info("Going to run codebrowser: %s", run_command) - run_log_path = os.path.join(temp_path, "runlog.log") + run_log_path = os.path.join(TEMP_PATH, "runlog.log") with TeePopen(run_command, run_log_path) as process: retcode = process.wait() @@ -60,7 +66,7 @@ if __name__ == "__main__": else: logging.info("Run failed") - subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {TEMP_PATH}", shell=True) report_path = os.path.join(result_path, "html_report") logging.info("Report path %s", report_path) @@ -76,12 +82,8 @@ if __name__ == "__main__": test_results = [(index_html, "Look at the report")] - report_url = upload_results( - s3_helper, 0, os.getenv("GITHUB_SHA"), test_results, [], NAME - ) + report_url = upload_results(s3_helper, 0, pr_info.sha, test_results, [], NAME) print(f"::notice ::Report url: {report_url}") - post_commit_status( - gh, os.getenv("GITHUB_SHA"), NAME, "Report built", "success", report_url - ) + post_commit_status(gh, pr_info.sha, NAME, "Report built", "success", report_url) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 185dc64daa9..785250c3904 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -17,7 +17,7 @@ RETRY = 5 CommitStatuses = List[CommitStatus] -def override_status(status: str, check_name: str, invert=False) -> str: +def override_status(status: str, check_name: str, invert: bool = False) -> str: if CI_CONFIG["tests_config"].get(check_name, {}).get("force_tests", False): return "success" @@ -45,7 +45,7 @@ def get_commit(gh: Github, commit_sha: str, retry_count: int = RETRY) -> Commit: def post_commit_status( gh: Github, sha: str, check_name: str, description: str, state: str, report_url: str -): +) -> None: for i in range(RETRY): try: commit = get_commit(gh, sha, 1) @@ -64,7 +64,7 @@ def post_commit_status( def post_commit_status_to_file( file_path: str, description: str, state: str, report_url: str -): +) -> None: if os.path.exists(file_path): raise Exception(f'File "{file_path}" already exists!') with open(file_path, "w", encoding="utf-8") as f: @@ -88,21 +88,21 @@ def get_commit_filtered_statuses(commit: Commit) -> CommitStatuses: return list(filtered.values()) -def remove_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]): +def remove_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]) -> None: repo = gh.get_repo(GITHUB_REPOSITORY) pull_request = repo.get_pull(pr_info.number) for label in labels_names: pull_request.remove_from_labels(label) -def post_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]): +def post_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]) -> None: repo = gh.get_repo(GITHUB_REPOSITORY) pull_request = repo.get_pull(pr_info.number) for label in labels_names: pull_request.add_to_labels(label) -def fail_mergeable_check(commit: Commit, description: str): +def fail_mergeable_check(commit: Commit, description: str) -> None: commit.create_status( context="Mergeable Check", description=description, @@ -111,7 +111,7 @@ def fail_mergeable_check(commit: Commit, description: str): ) -def reset_mergeable_check(commit: Commit, description: str = ""): +def reset_mergeable_check(commit: Commit, description: str = "") -> None: commit.create_status( context="Mergeable Check", description=description, @@ -120,7 +120,7 @@ def reset_mergeable_check(commit: Commit, description: str = ""): ) -def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str): +def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None: if SKIP_MERGEABLE_CHECK_LABEL in pr_info.labels: return diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 873aee9aabf..0618969f94c 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,7 +8,7 @@ import shutil import subprocess import time import sys -from typing import Dict, List, Optional, Set, Tuple, Union +from typing import Any, Dict, List, Optional, Set, Tuple, Union from github import Github @@ -52,7 +52,7 @@ class DockerImage: and self.only_amd64 == other.only_amd64 ) - def __lt__(self, other) -> bool: + def __lt__(self, other: Any) -> bool: if not isinstance(other, DockerImage): return False if self.parent and not other.parent: @@ -270,7 +270,7 @@ def build_and_push_one_image( def process_single_image( image: DockerImage, versions: List[str], - additional_cache, + additional_cache: str, push: bool, child: bool, ) -> List[Tuple[str, str, str]]: diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index 09b7a99da78..2ba5a99de0a 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -70,7 +70,7 @@ def parse_args() -> argparse.Namespace: def load_images(path: str, suffix: str) -> Images: with open(os.path.join(path, CHANGED_IMAGES.format(suffix)), "rb") as images: - return json.load(images) + return json.load(images) # type: ignore def strip_suffix(suffix: str, images: Images) -> Images: diff --git a/tests/ci/docker_pull_helper.py b/tests/ci/docker_pull_helper.py index 04817ed7de3..5336966b3eb 100644 --- a/tests/ci/docker_pull_helper.py +++ b/tests/ci/docker_pull_helper.py @@ -6,11 +6,11 @@ import time import subprocess import logging -from typing import Optional +from typing import List, Optional class DockerImage: - def __init__(self, name, version: Optional[str] = None): + def __init__(self, name: str, version: Optional[str] = None): self.name = name if version is None: self.version = "latest" @@ -22,8 +22,11 @@ class DockerImage: def get_images_with_versions( - reports_path, required_image, pull=True, version: Optional[str] = None -): + reports_path: str, + required_images: List[str], + pull: bool = True, + version: Optional[str] = None, +) -> List[DockerImage]: images_path = None for root, _, files in os.walk(reports_path): for f in files: @@ -45,12 +48,13 @@ def get_images_with_versions( images = {} docker_images = [] - for image_name in required_image: + for image_name in required_images: docker_image = DockerImage(image_name, version) if image_name in images: docker_image.version = images[image_name] docker_images.append(docker_image) + latest_error = Exception("predefined to avoid access before created") if pull: for docker_image in docker_images: for i in range(10): @@ -75,6 +79,8 @@ def get_images_with_versions( return docker_images -def get_image_with_version(reports_path, image, pull=True, version=None): +def get_image_with_version( + reports_path: str, image: str, pull: bool = True, version: Optional[str] = None +) -> DockerImage: logging.info("Looking for images file in %s", reports_path) return get_images_with_versions(reports_path, [image], pull, version=version)[0] diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 1848300e2f6..8b18a580ed7 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -43,55 +43,55 @@ class TestDockerImageCheck(unittest.TestCase): "docker/test/stateless", "clickhouse/stateless-test", False, - "clickhouse/test-base", + "clickhouse/test-base", # type: ignore ), di.DockerImage( "docker/test/integration/base", "clickhouse/integration-test", False, - "clickhouse/test-base", + "clickhouse/test-base", # type: ignore ), di.DockerImage( "docker/test/fuzzer", "clickhouse/fuzzer", False, - "clickhouse/test-base", + "clickhouse/test-base", # type: ignore ), di.DockerImage( "docker/test/keeper-jepsen", "clickhouse/keeper-jepsen-test", False, - "clickhouse/test-base", + "clickhouse/test-base", # type: ignore ), di.DockerImage( "docker/docs/check", "clickhouse/docs-check", False, - "clickhouse/docs-builder", + "clickhouse/docs-builder", # type: ignore ), di.DockerImage( "docker/docs/release", "clickhouse/docs-release", False, - "clickhouse/docs-builder", + "clickhouse/docs-builder", # type: ignore ), di.DockerImage( "docker/test/stateful", "clickhouse/stateful-test", False, - "clickhouse/stateless-test", + "clickhouse/stateless-test", # type: ignore ), di.DockerImage( "docker/test/unit", "clickhouse/unit-test", False, - "clickhouse/stateless-test", + "clickhouse/stateless-test", # type: ignore ), di.DockerImage( "docker/test/stress", "clickhouse/stress-test", False, - "clickhouse/stateful-test", + "clickhouse/stateful-test", # type: ignore ), ] ) @@ -277,7 +277,7 @@ class TestDockerServer(unittest.TestCase): ds.gen_tags(version, "auto") @patch("docker_server.get_tagged_versions") - def test_auto_release_type(self, mock_tagged_versions: MagicMock): + def test_auto_release_type(self, mock_tagged_versions: MagicMock) -> None: mock_tagged_versions.return_value = [ get_version_from_string("1.1.1.1"), get_version_from_string("1.2.1.1"), diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 03e42726808..2a6a0d5fa57 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -6,6 +6,7 @@ import os import csv import sys import atexit +from typing import List, Tuple from github import Github @@ -50,8 +51,10 @@ def get_fasttest_cmd( ) -def process_results(result_folder): - test_results = [] +def process_results( + result_folder: str, +) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: + test_results = [] # type: List[Tuple[str, str]] additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible for content of @@ -78,7 +81,7 @@ def process_results(result_folder): results_path = os.path.join(result_folder, "test_results.tsv") if os.path.exists(results_path): with open(results_path, "r", encoding="utf-8") as results_file: - test_results = list(csv.reader(results_file, delimiter="\t")) + test_results = list(csv.reader(results_file, delimiter="\t")) # type: ignore if len(test_results) == 0: return "error", "Empty test_results.tsv", test_results, additional_files @@ -172,7 +175,7 @@ if __name__ == "__main__": "test_log.txt" in test_output_files or "test_result.txt" in test_output_files ) test_result_exists = "test_results.tsv" in test_output_files - test_results = [] + test_results = [] # type: List[Tuple[str, str]] if "submodule_log.txt" not in test_output_files: description = "Cannot clone repository" state = "failure" diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index a0b7f14ecfb..ea2f5eb3136 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -5,27 +5,11 @@ from github import Github from env_helper import GITHUB_RUN_URL from pr_info import PRInfo from get_robot_token import get_best_robot_token -from commit_status_helper import get_commit +from commit_status_helper import get_commit, get_commit_filtered_statuses NAME = "Run Check" -def filter_statuses(statuses): - """ - Squash statuses to latest state - 1. context="first", state="success", update_time=1 - 2. context="second", state="success", update_time=2 - 3. context="first", stat="failure", update_time=3 - =========> - 1. context="second", state="success" - 2. context="first", stat="failure" - """ - filt = {} - for status in sorted(statuses, key=lambda x: x.updated_at): - filt[status.context] = status - return filt - - if __name__ == "__main__": logging.basicConfig(level=logging.INFO) @@ -34,8 +18,13 @@ if __name__ == "__main__": commit = get_commit(gh, pr_info.sha) url = GITHUB_RUN_URL - statuses = filter_statuses(list(commit.get_statuses())) - if NAME in statuses and statuses[NAME].state == "pending": + statuses = get_commit_filtered_statuses(commit) + pending_status = any( # find NAME status in pending state + True + for status in statuses + if status.context == NAME and status.state == "pending" + ) + if pending_status: commit.create_status( context=NAME, description="All checks finished", diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index f7d3288c316..87833d688af 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -7,6 +7,7 @@ import os import subprocess import sys import atexit +from typing import List, Tuple from github import Github @@ -122,8 +123,11 @@ def get_tests_to_run(pr_info): return list(result) -def process_results(result_folder, server_log_path): - test_results = [] +def process_results( + result_folder: str, + server_log_path: str, +) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: + test_results = [] # type: List[Tuple[str, str]] additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible for content of result_folder. @@ -166,7 +170,7 @@ def process_results(result_folder, server_log_path): return "error", "Not found test_results.tsv", test_results, additional_files with open(results_path, "r", encoding="utf-8") as results_file: - test_results = list(csv.reader(results_file, delimiter="\t")) + test_results = list(csv.reader(results_file, delimiter="\t")) # type: ignore if len(test_results) == 0: return "error", "Empty test_results.tsv", test_results, additional_files @@ -232,8 +236,8 @@ if __name__ == "__main__": sys.exit(0) if "RUN_BY_HASH_NUM" in os.environ: - run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM")) - run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL")) + run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0")) + run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0")) check_name_with_group = ( check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]" ) diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py index bfce69a17d9..b9ad51379d2 100755 --- a/tests/ci/get_previous_release_tag.py +++ b/tests/ci/get_previous_release_tag.py @@ -3,7 +3,7 @@ import re import logging -import requests +import requests # type: ignore CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" VERSION_PATTERN = r"(v(?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)" diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py index 163e1ce071e..6ecaf468ed1 100644 --- a/tests/ci/get_robot_token.py +++ b/tests/ci/get_robot_token.py @@ -1,8 +1,17 @@ #!/usr/bin/env python3 import logging +from dataclasses import dataclass import boto3 # type: ignore -from github import Github # type: ignore +from github import Github +from github.AuthenticatedUser import AuthenticatedUser + + +@dataclass +class Token: + user: AuthenticatedUser + value: str + rest: int def get_parameter_from_ssm(name, decrypt=True, client=None): @@ -19,7 +28,7 @@ def get_best_robot_token(token_prefix_env_name="github_robot_token_"): ] )["Parameters"] assert parameters - token = {"login": "", "value": "", "rest": 0} + token = None for token_name in [p["Name"] for p in parameters]: value = get_parameter_from_ssm(token_name, True, client) @@ -29,12 +38,15 @@ def get_best_robot_token(token_prefix_env_name="github_robot_token_"): user = gh.get_user() rest, _ = gh.rate_limiting logging.info("Get token with %s remaining requests", rest) - if token["rest"] < rest: - token = {"user": user, "value": value, "rest": rest} + if token is None: + token = Token(user, value, rest) + continue + if token.rest < rest: + token.user, token.value, token.rest = user, value, rest - assert token["value"] + assert token logging.info( - "User %s with %s remaining requests is used", token["user"].login, token["rest"] + "User %s with %s remaining requests is used", token.user.login, token.rest ) - return token["value"] + return token.value diff --git a/tests/ci/git_helper.py b/tests/ci/git_helper.py index 77c2fc9cf05..eb5e835eab3 100644 --- a/tests/ci/git_helper.py +++ b/tests/ci/git_helper.py @@ -4,7 +4,7 @@ import logging import os.path as p import re import subprocess -from typing import List, Optional +from typing import Any, List, Optional logger = logging.getLogger(__name__) @@ -21,19 +21,19 @@ TWEAK = 1 # Py 3.8 removeprefix and removesuffix -def removeprefix(string: str, prefix: str): +def removeprefix(string: str, prefix: str) -> str: if string.startswith(prefix): return string[len(prefix) :] # noqa: ignore E203, false positive return string -def removesuffix(string: str, suffix: str): +def removesuffix(string: str, suffix: str) -> str: if string.endswith(suffix): return string[: -len(suffix)] return string -def commit(name: str): +def commit(name: str) -> str: r = re.compile(SHA_REGEXP) if not r.match(name): raise argparse.ArgumentTypeError( @@ -42,7 +42,7 @@ def commit(name: str): return name -def release_branch(name: str): +def release_branch(name: str) -> str: r = re.compile(RELEASE_BRANCH_REGEXP) if not r.match(name): raise argparse.ArgumentTypeError("release branch should be as 12.1") @@ -55,20 +55,23 @@ class Runner: def __init__(self, cwd: str = CWD): self._cwd = cwd - def run(self, cmd: str, cwd: Optional[str] = None, **kwargs) -> str: + def run(self, cmd: str, cwd: Optional[str] = None, **kwargs: Any) -> str: if cwd is None: cwd = self.cwd logger.debug("Running command: %s", cmd) - return subprocess.check_output( - cmd, shell=True, cwd=cwd, encoding="utf-8", **kwargs - ).strip() + output = str( + subprocess.check_output( + cmd, shell=True, cwd=cwd, encoding="utf-8", **kwargs + ).strip() + ) + return output @property def cwd(self) -> str: return self._cwd @cwd.setter - def cwd(self, value: str): + def cwd(self, value: str) -> None: # Set _cwd only once, then set it to readonly if self._cwd != CWD: return @@ -139,7 +142,7 @@ class Git: ) @staticmethod - def check_tag(value: str): + def check_tag(value: str) -> None: if value == "": return if not Git._tag_pattern.match(value): @@ -150,7 +153,7 @@ class Git: return self._latest_tag @latest_tag.setter - def latest_tag(self, value: str): + def latest_tag(self, value: str) -> None: self.check_tag(value) self._latest_tag = value @@ -159,7 +162,7 @@ class Git: return self._new_tag @new_tag.setter - def new_tag(self, value: str): + def new_tag(self, value: str) -> None: self.check_tag(value) self._new_tag = value diff --git a/tests/ci/github_helper.py b/tests/ci/github_helper.py index 685d9f2c841..bd740827b34 100644 --- a/tests/ci/github_helper.py +++ b/tests/ci/github_helper.py @@ -8,11 +8,18 @@ from time import sleep from typing import List, Optional, Tuple import github -from github.GithubException import RateLimitExceededException -from github.Issue import Issue -from github.NamedUser import NamedUser -from github.PullRequest import PullRequest -from github.Repository import Repository + +# explicit reimport +# pylint: disable=useless-import-alias +from github.GithubException import ( + RateLimitExceededException as RateLimitExceededException, +) +from github.Issue import Issue as Issue +from github.NamedUser import NamedUser as NamedUser +from github.PullRequest import PullRequest as PullRequest +from github.Repository import Repository as Repository + +# pylint: enable=useless-import-alias CACHE_PATH = p.join(p.dirname(p.realpath(__file__)), "gh_cache") @@ -90,7 +97,7 @@ class GitHub(github.Github): raise exception # pylint: enable=signature-differs - def get_pulls_from_search(self, *args, **kwargs) -> PullRequests: + def get_pulls_from_search(self, *args, **kwargs) -> PullRequests: # type: ignore """The search api returns actually issues, so we need to fetch PullRequests""" issues = self.search_issues(*args, **kwargs) repos = {} @@ -168,7 +175,7 @@ class GitHub(github.Github): self.dump(user, prfd) # type: ignore return user - def _get_cached(self, path: Path): + def _get_cached(self, path: Path): # type: ignore with open(path, "rb") as ob_fd: return self.load(ob_fd) # type: ignore @@ -190,11 +197,11 @@ class GitHub(github.Github): return False, cached_obj @property - def cache_path(self): + def cache_path(self) -> Path: return self._cache_path @cache_path.setter - def cache_path(self, value: str): + def cache_path(self, value: str) -> None: self._cache_path = Path(value) if self._cache_path.exists(): assert self._cache_path.is_dir() @@ -208,5 +215,6 @@ class GitHub(github.Github): return self._retries @retries.setter - def retries(self, value: int): + def retries(self, value: int) -> None: + assert isinstance(value, int) self._retries = value diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index cba428cbcf5..e61117a4b45 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -7,6 +7,7 @@ import logging import os import subprocess import sys +from typing import List, Tuple from github import Github @@ -87,8 +88,10 @@ def get_env_for_runner(build_path, repo_path, result_path, work_path): return my_env -def process_results(result_folder): - test_results = [] +def process_results( + result_folder: str, +) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: + test_results = [] # type: List[Tuple[str, str]] additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible for content of result_folder. @@ -115,7 +118,7 @@ def process_results(result_folder): results_path = os.path.join(result_folder, "test_results.tsv") if os.path.exists(results_path): with open(results_path, "r", encoding="utf-8") as results_file: - test_results = list(csv.reader(results_file, delimiter="\t")) + test_results = list(csv.reader(results_file, delimiter="\t")) # type: ignore if len(test_results) == 0: return "error", "Empty test_results.tsv", test_results, additional_files @@ -153,8 +156,8 @@ if __name__ == "__main__": validate_bugix_check = args.validate_bugfix if "RUN_BY_HASH_NUM" in os.environ: - run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM")) - run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL")) + run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0")) + run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0")) check_name_with_group = ( check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]" ) diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index 4116d15bba6..69964c0a0bc 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -7,9 +7,9 @@ import sys import argparse -import boto3 +import boto3 # type: ignore +import requests # type: ignore from github import Github -import requests from env_helper import REPO_COPY, TEMP_PATH, S3_BUILDS_BUCKET, S3_DOWNLOAD from stopwatch import Stopwatch diff --git a/tests/ci/mark_release_ready.py b/tests/ci/mark_release_ready.py index be1771e62bd..57ddb166693 100644 --- a/tests/ci/mark_release_ready.py +++ b/tests/ci/mark_release_ready.py @@ -5,8 +5,7 @@ from env_helper import GITHUB_JOB_URL from get_robot_token import get_best_robot_token from github_helper import GitHub from pr_info import PRInfo - -RELEASE_READY_STATUS = "Ready for release" +from release import RELEASE_READY_STATUS def main(): diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 6a2fac0a291..ddeb070b2b9 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -2,7 +2,7 @@ import json import logging import os -from typing import Set +from typing import Dict, List, Set, Union from unidiff import PatchSet # type: ignore @@ -16,6 +16,7 @@ from env_helper import ( FORCE_TESTS_LABEL = "force tests" SKIP_MERGEABLE_CHECK_LABEL = "skip mergeable check" +NeedsDataType = Dict[str, Dict[str, Union[str, Dict[str, str]]]] DIFF_IN_DOCUMENTATION_EXT = [ ".html", @@ -46,15 +47,22 @@ def get_pr_for_commit(sha, ref): try: response = get_with_retries(try_get_pr_url, sleep=RETRY_SLEEP) data = response.json() + our_prs = [] # type: List[Dict] if len(data) > 1: print("Got more than one pr for commit", sha) for pr in data: + # We need to check if the PR is created in our repo, because + # https://github.com/kaynewu/ClickHouse/pull/2 + # has broke our PR search once in a while + if pr["base"]["repo"]["full_name"] != GITHUB_REPOSITORY: + continue # refs for pushes looks like refs/head/XX # refs for RPs looks like XX if pr["head"]["ref"] in ref: return pr + our_prs.append(pr) print("Cannot find PR with required ref", ref, "returning first one") - first_pr = data[0] + first_pr = our_prs[0] return first_pr except Exception as ex: print("Cannot fetch PR info from commit", ex) @@ -146,7 +154,7 @@ class PRInfo: self.body = github_event["pull_request"]["body"] self.labels = { label["name"] for label in github_event["pull_request"]["labels"] - } + } # type: Set[str] self.user_login = github_event["pull_request"]["user"]["login"] self.user_orgs = set([]) @@ -178,7 +186,7 @@ class PRInfo: if pull_request is None or pull_request["state"] == "closed": # it's merged PR to master self.number = 0 - self.labels = {} + self.labels = set() self.pr_html_url = f"{repo_prefix}/commits/{ref}" self.base_ref = ref self.base_name = self.repo_full_name @@ -228,7 +236,7 @@ class PRInfo: print(json.dumps(github_event, sort_keys=True, indent=4)) self.sha = os.getenv("GITHUB_SHA") self.number = 0 - self.labels = {} + self.labels = set() repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" self.task_url = GITHUB_RUN_URL self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" diff --git a/tests/ci/push_to_artifactory.py b/tests/ci/push_to_artifactory.py index dd8081227bf..97971f207ce 100755 --- a/tests/ci/push_to_artifactory.py +++ b/tests/ci/push_to_artifactory.py @@ -5,7 +5,7 @@ import logging import os import re from collections import namedtuple -from typing import Dict, List, Tuple +from typing import Dict, List, Optional, Tuple from artifactory import ArtifactorySaaSPath # type: ignore from build_download_helper import download_build_with_progress @@ -14,7 +14,7 @@ from git_helper import TAG_REGEXP, commit, removeprefix, removesuffix # Necessary ENV variables -def getenv(name: str, default: str = None): +def getenv(name: str, default: Optional[str] = None) -> str: env = os.getenv(name, default) if env is not None: return env @@ -62,7 +62,7 @@ class Packages: raise ValueError(f"{deb_pkg} not in {self.deb}") return removesuffix(deb_pkg, ".deb").split("_")[-1] - def replace_with_fallback(self, name: str): + def replace_with_fallback(self, name: str) -> None: if name.endswith(".deb"): suffix = self.deb.pop(name) self.deb[self.fallback_to_all(name)] = self.fallback_to_all(suffix) @@ -80,7 +80,7 @@ class Packages: return os.path.join(TEMP_PATH, package_file) @staticmethod - def fallback_to_all(url_or_name: str): + def fallback_to_all(url_or_name: str) -> str: """Until July 2022 we had clickhouse-server and clickhouse-client with arch 'all'""" # deb @@ -111,7 +111,7 @@ class S3: self.force_download = force_download self.packages = Packages(version) - def download_package(self, package_file: str, s3_path_suffix: str): + def download_package(self, package_file: str, s3_path_suffix: str) -> None: path = Packages.path(package_file) fallback_path = Packages.fallback_to_all(path) if not self.force_download and ( @@ -186,7 +186,12 @@ class Release: class Artifactory: def __init__( - self, url: str, release: str, deb_repo="deb", rpm_repo="rpm", tgz_repo="tgz" + self, + url: str, + release: str, + deb_repo: str = "deb", + rpm_repo: str = "rpm", + tgz_repo: str = "tgz", ): self._url = url self._release = release @@ -196,7 +201,7 @@ class Artifactory: # check the credentials ENVs for early exit self.__path_helper("_deb", "") - def deploy_deb(self, packages: Packages): + def deploy_deb(self, packages: Packages) -> None: for package_file in packages.deb: path = packages.path(package_file) dist = self._release @@ -212,13 +217,13 @@ class Artifactory: ) self.deb_path(package_file).deploy_deb(path, dist, comp, arch) - def deploy_rpm(self, packages: Packages): + def deploy_rpm(self, packages: Packages) -> None: for package_file in packages.rpm: path = packages.path(package_file) logging.info("Deploy %s to artifactory", path) self.rpm_path(package_file).deploy_file(path) - def deploy_tgz(self, packages: Packages): + def deploy_tgz(self, packages: Packages) -> None: for package_file in packages.tgz: path = packages.path(package_file) logging.info("Deploy %s to artifactory", path) @@ -316,19 +321,19 @@ def parse_args() -> argparse.Namespace: return args -def process_deb(s3: S3, art_clients: List[Artifactory]): +def process_deb(s3: S3, art_clients: List[Artifactory]) -> None: s3.download_deb() for art_client in art_clients: art_client.deploy_deb(s3.packages) -def process_rpm(s3: S3, art_clients: List[Artifactory]): +def process_rpm(s3: S3, art_clients: List[Artifactory]) -> None: s3.download_rpm() for art_client in art_clients: art_client.deploy_rpm(s3.packages) -def process_tgz(s3: S3, art_clients: List[Artifactory]): +def process_tgz(s3: S3, art_clients: List[Artifactory]) -> None: s3.download_tgz() for art_client in art_clients: art_client.deploy_tgz(s3.packages) diff --git a/tests/ci/release.py b/tests/ci/release.py index 8024091e300..8e58413f91f 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ script to create releases for ClickHouse @@ -11,14 +11,13 @@ On another hand, PyGithub is used for convenient getting commit's status from AP from contextlib import contextmanager -from typing import List, Optional +from typing import Any, Iterator, List, Literal, Optional import argparse +import json import logging import subprocess from git_helper import commit, release_branch -from github_helper import GitHub -from mark_release_ready import RELEASE_READY_STATUS from version_helper import ( FILE_WITH_VERSION_PATH, GENERATED_CONTRIBUTORS, @@ -31,6 +30,7 @@ from version_helper import ( update_contributors, ) +RELEASE_READY_STATUS = "Ready for release" git = Git() @@ -48,7 +48,7 @@ class Repo: return self._url @url.setter - def url(self, protocol: str): + def url(self, protocol: str) -> None: if protocol == "ssh": self._url = f"git@github.com:{self}.git" elif protocol == "https": @@ -68,17 +68,23 @@ class Release: CMAKE_PATH = get_abs_path(FILE_WITH_VERSION_PATH) CONTRIBUTORS_PATH = get_abs_path(GENERATED_CONTRIBUTORS) - def __init__(self, repo: Repo, release_commit: str, release_type: str): + def __init__( + self, + repo: Repo, + release_commit: str, + release_type: Literal["major", "minor", "patch"], + ): self.repo = repo self._release_commit = "" self.release_commit = release_commit + assert release_type in self.BIG + self.SMALL self.release_type = release_type self._git = git self._version = get_version_from_repo(git=self._git) self._release_branch = "" self._rollback_stack = [] # type: List[str] - def run(self, cmd: str, cwd: Optional[str] = None, **kwargs) -> str: + def run(self, cmd: str, cwd: Optional[str] = None, **kwargs: Any) -> str: cwd_text = "" if cwd: cwd_text = f" (CWD='{cwd}')" @@ -106,31 +112,30 @@ class Release: return VersionType.STABLE def check_commit_release_ready(self): - # First, get the auth token from gh cli - auth_status = self.run( - "gh auth status -t", stderr=subprocess.STDOUT - ).splitlines() - token = "" - for line in auth_status: - if "✓ Token:" in line: - token = line.split()[-1] - if not token: - logging.error("Can not extract token from `gh auth`") - raise subprocess.SubprocessError("Can not extract token from `gh auth`") - gh = GitHub(token, per_page=100) - repo = gh.get_repo(str(self.repo)) + per_page = 100 + page = 1 + while True: + statuses = json.loads( + self.run( + f"gh api 'repos/{self.repo}/commits/{self.release_commit}" + f"/statuses?per_page={per_page}&page={page}'" + ) + ) + + if not statuses: + break + + for status in statuses: + if status["context"] == RELEASE_READY_STATUS: + if not status["state"] == "success": + raise Exception( + f"the status {RELEASE_READY_STATUS} is {status['state']}" + ", not success" + ) - # Statuses are ordered by descending updated_at, so the first necessary - # status in the list is the most recent - statuses = repo.get_commit(self.release_commit).get_statuses() - for status in statuses: - if status.context == RELEASE_READY_STATUS: - if status.state == "success": return - raise Exception( - f"the status {RELEASE_READY_STATUS} is {status.state}, not success" - ) + page += 1 raise Exception( f"the status {RELEASE_READY_STATUS} " @@ -153,7 +158,9 @@ class Release: self.check_commit_release_ready() - def do(self, check_dirty: bool, check_branch: bool, with_release_branch: bool): + def do( + self, check_dirty: bool, check_branch: bool, with_release_branch: bool + ) -> None: self.check_prerequisites() if check_dirty: @@ -310,7 +317,7 @@ class Release: return self._version @version.setter - def version(self, version: ClickHouseVersion): + def version(self, version: ClickHouseVersion) -> None: if not isinstance(version, ClickHouseVersion): raise ValueError(f"version must be ClickHouseVersion, not {type(version)}") self._version = version @@ -320,7 +327,7 @@ class Release: return self._release_branch @release_branch.setter - def release_branch(self, branch: str): + def release_branch(self, branch: str) -> None: self._release_branch = release_branch(branch) @property @@ -328,7 +335,7 @@ class Release: return self._release_commit @release_commit.setter - def release_commit(self, release_commit: str): + def release_commit(self, release_commit: str) -> None: self._release_commit = commit(release_commit) @contextmanager @@ -367,7 +374,7 @@ class Release: yield @contextmanager - def _bump_testing_version(self, helper_branch: str): + def _bump_testing_version(self, helper_branch: str) -> Iterator[None]: self.read_version() self.version = self.version.update(self.release_type) self.version.with_description(VersionType.TESTING) @@ -387,7 +394,7 @@ class Release: yield @contextmanager - def _checkout(self, ref: str, with_checkout_back: bool = False): + def _checkout(self, ref: str, with_checkout_back: bool = False) -> Iterator[None]: orig_ref = self._git.branch or self._git.sha need_rollback = False if ref not in (self._git.branch, self._git.sha): @@ -406,7 +413,7 @@ class Release: self.run(rollback_cmd) @contextmanager - def _create_branch(self, name: str, start_point: str = ""): + def _create_branch(self, name: str, start_point: str = "") -> Iterator[None]: self.run(f"git branch {name} {start_point}") rollback_cmd = f"git branch -D {name}" self._rollback_stack.append(rollback_cmd) @@ -418,7 +425,7 @@ class Release: raise @contextmanager - def _create_gh_label(self, label: str, color_hex: str): + def _create_gh_label(self, label: str, color_hex: str) -> Iterator[None]: # API call, https://docs.github.com/en/rest/reference/issues#create-a-label self.run( f"gh api repos/{self.repo}/labels -f name={label} -f color={color_hex}" @@ -433,7 +440,7 @@ class Release: raise @contextmanager - def _create_gh_release(self, as_prerelease: bool): + def _create_gh_release(self, as_prerelease: bool) -> Iterator[None]: with self._create_tag(): # Preserve tag if version is changed tag = self.version.describe @@ -468,7 +475,9 @@ class Release: raise @contextmanager - def _push(self, ref: str, with_rollback_on_fail: bool = True, remote_ref: str = ""): + def _push( + self, ref: str, with_rollback_on_fail: bool = True, remote_ref: str = "" + ) -> Iterator[None]: if remote_ref == "": remote_ref = ref diff --git a/tests/ci/report.py b/tests/ci/report.py index a6700f50dfc..2904a5519a9 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -101,7 +101,7 @@ def _format_header(header, branch_name, branch_url=None): result = "ClickHouse " + result result += " for " if branch_url: - result += '{name}'.format(url=branch_url, name=branch_name) + result += f'{branch_name}' else: result += branch_name return result @@ -140,9 +140,7 @@ def _get_html_url(url): if isinstance(url, tuple): href, name = url[0], _get_html_url_name(url) if href and name: - return '{name}'.format( - href=href, name=_get_html_url_name(url) - ) + return f'{_get_html_url_name(url)}' return "" @@ -199,13 +197,7 @@ def create_test_html_report( num_fails = num_fails + 1 is_fail_id = 'id="fail' + str(num_fails) + '" ' - row += ( - "'.format(style) - + test_status - + "" - ) + row += f'{test_status}' if test_time is not None: row += "" + test_time + "" @@ -229,8 +221,8 @@ def create_test_html_report( if has_test_logs and not with_raw_logs: headers.append("Logs") - headers = "".join(["" + h + "" for h in headers]) - test_part = HTML_TEST_PART.format(headers=headers, rows=rows_part) + headers_html = "".join(["" + h + "" for h in headers]) + test_part = HTML_TEST_PART.format(headers=headers_html, rows=rows_part) else: test_part = "" @@ -317,33 +309,33 @@ def create_build_html_report( build_results, build_logs_urls, artifact_urls_list ): row = "" - row += "{}".format(build_result.compiler) + row += f"{build_result.compiler}" if build_result.build_type: - row += "{}".format(build_result.build_type) + row += f"{build_result.build_type}" else: - row += "{}".format("relwithdebuginfo") + row += "relwithdebuginfo" if build_result.sanitizer: - row += "{}".format(build_result.sanitizer) + row += f"{build_result.sanitizer}" else: - row += "{}".format("none") + row += "none" - row += "{}".format(build_result.libraries) + row += f"{build_result.libraries}" if build_result.status: style = _get_status_style(build_result.status) - row += '{}'.format(style, build_result.status) + row += f'{build_result.status}' else: style = _get_status_style("error") - row += '{}'.format(style, "error") + row += f'error' - row += 'link'.format(build_log_url) + row += f'link' if build_result.elapsed_seconds: delta = datetime.timedelta(seconds=build_result.elapsed_seconds) else: - delta = "unknown" + delta = "unknown" # type: ignore - row += "{}".format(str(delta)) + row += f"{delta}" links = "" link_separator = "
" @@ -355,7 +347,7 @@ def create_build_html_report( links += link_separator if links: links = links[: -len(link_separator)] - row += "{}".format(links) + row += f"{links}" row += "" rows += row diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 39dbc938c8f..7119f443719 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -112,7 +112,7 @@ def should_run_checks_for_pr(pr_info: PRInfo) -> Tuple[bool, str, str]: return True, "No special conditions apply", "pending" -def check_pr_description(pr_info) -> Tuple[str, str]: +def check_pr_description(pr_info: PRInfo) -> Tuple[str, str]: lines = list( map(lambda x: x.strip(), pr_info.body.split("\n") if pr_info.body else []) ) diff --git a/tests/ci/runner_token_rotation_lambda/__init__.py b/tests/ci/runner_token_rotation_lambda/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 24ff013d69a..03e855a0057 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -46,7 +46,7 @@ class S3Helper: self.host = host self.download_host = download_host - def _upload_file_to_s3(self, bucket_name, file_path, s3_path): + def _upload_file_to_s3(self, bucket_name: str, file_path: str, s3_path: str) -> str: logging.debug( "Start uploading %s to bucket=%s path=%s", file_path, bucket_name, s3_path ) @@ -110,7 +110,7 @@ class S3Helper: url = f"{self.download_host}/{bucket_name}/{s3_path}" return url.replace("+", "%2B").replace(" ", "%20") - def upload_test_report_to_s3(self, file_path, s3_path): + def upload_test_report_to_s3(self, file_path: str, s3_path: str) -> str: if CI: return self._upload_file_to_s3(S3_TEST_REPORTS_BUCKET, file_path, s3_path) else: @@ -296,7 +296,7 @@ class S3Helper: return False @staticmethod - def copy_file_to_local(bucket_name, file_path, s3_path): + def copy_file_to_local(bucket_name: str, file_path: str, s3_path: str) -> str: local_path = os.path.abspath( os.path.join(RUNNER_TEMP, "s3", bucket_name, s3_path) ) diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index 63c7d18fe46..5e94969d4b1 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -4,6 +4,7 @@ import logging import subprocess import os import sys +from typing import List, Tuple from github import Github @@ -137,7 +138,7 @@ if __name__ == "__main__": report_url = GITHUB_RUN_URL status = "success" - test_results = [] + test_results = [] # type: List[Tuple[str, str]] # Try to get status message saved by the SQLancer try: # with open( @@ -145,7 +146,7 @@ if __name__ == "__main__": # ) as status_f: # status = status_f.readline().rstrip("\n") if os.path.exists(os.path.join(workspace_path, "server_crashed.log")): - test_results.append("Server crashed", "FAIL") + test_results.append(("Server crashed", "FAIL")) with open( os.path.join(workspace_path, "summary.tsv"), "r", encoding="utf-8" ) as summary_f: diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 8f310eaa99d..c02128d114f 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -5,6 +5,7 @@ import logging import subprocess import os import sys +from typing import List, Tuple from github import Github @@ -44,8 +45,10 @@ def get_run_command( return cmd -def process_results(result_folder, server_log_path, run_log_path): - test_results = [] +def process_results( + result_folder: str, server_log_path: str, run_log_path: str +) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: + test_results = [] # type: List[Tuple[str, str]] additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible for content @@ -89,7 +92,7 @@ def process_results(result_folder, server_log_path, run_log_path): results_path = os.path.join(result_folder, "test_results.tsv") with open(results_path, "r", encoding="utf-8") as results_file: - test_results = list(csv.reader(results_file, delimiter="\t")) + test_results = list(csv.reader(results_file, delimiter="\t")) # type: ignore if len(test_results) == 0: raise Exception("Empty results") diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 23a1dd467d7..70bf1cd4d17 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -1,11 +1,13 @@ #!/usr/bin/env python3 import argparse +import atexit import csv import logging import os import subprocess import sys -import atexit + +from typing import List, Tuple from clickhouse_helper import ( @@ -28,9 +30,18 @@ from upload_result_helper import upload_results NAME = "Style Check" +GIT_PREFIX = ( # All commits to remote are done as robot-clickhouse + "git -c user.email=robot-clickhouse@users.noreply.github.com " + "-c user.name=robot-clickhouse -c commit.gpgsign=false " + "-c core.sshCommand=" + "'ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no'" +) -def process_result(result_folder): - test_results = [] + +def process_result( + result_folder: str, +) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: + test_results = [] # type: List[Tuple[str, str]] additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible @@ -57,7 +68,7 @@ def process_result(result_folder): try: results_path = os.path.join(result_folder, "test_results.tsv") with open(results_path, "r", encoding="utf-8") as fd: - test_results = list(csv.reader(fd, delimiter="\t")) + test_results = list(csv.reader(fd, delimiter="\t")) # type: ignore if len(test_results) == 0: raise Exception("Empty results") @@ -81,7 +92,7 @@ def parse_args(): return parser.parse_args() -def checkout_head(pr_info: PRInfo): +def checkout_head(pr_info: PRInfo) -> None: # It works ONLY for PRs, and only over ssh, so either # ROBOT_CLICKHOUSE_SSH_KEY should be set or ssh-agent should work assert pr_info.number @@ -89,14 +100,8 @@ def checkout_head(pr_info: PRInfo): # We can't push to forks, sorry folks return remote_url = pr_info.event["pull_request"]["base"]["repo"]["ssh_url"] - git_prefix = ( # All commits to remote are done as robot-clickhouse - "git -c user.email=robot-clickhouse@clickhouse.com " - "-c user.name=robot-clickhouse -c commit.gpgsign=false " - "-c core.sshCommand=" - "'ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no'" - ) fetch_cmd = ( - f"{git_prefix} fetch --depth=1 " + f"{GIT_PREFIX} fetch --depth=1 " f"{remote_url} {pr_info.head_ref}:head-{pr_info.head_ref}" ) if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""): @@ -107,7 +112,7 @@ def checkout_head(pr_info: PRInfo): git_runner(f"git checkout -f head-{pr_info.head_ref}") -def commit_push_staged(pr_info: PRInfo): +def commit_push_staged(pr_info: PRInfo) -> None: # It works ONLY for PRs, and only over ssh, so either # ROBOT_CLICKHOUSE_SSH_KEY should be set or ssh-agent should work assert pr_info.number @@ -118,15 +123,9 @@ def commit_push_staged(pr_info: PRInfo): if not git_staged: return remote_url = pr_info.event["pull_request"]["base"]["repo"]["ssh_url"] - git_prefix = ( # All commits to remote are done as robot-clickhouse - "git -c user.email=robot-clickhouse@clickhouse.com " - "-c user.name=robot-clickhouse -c commit.gpgsign=false " - "-c core.sshCommand=" - "'ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no'" - ) - git_runner(f"{git_prefix} commit -m 'Automatic style fix'") + git_runner(f"{GIT_PREFIX} commit -m 'Automatic style fix'") push_cmd = ( - f"{git_prefix} push {remote_url} head-{pr_info.head_ref}:{pr_info.head_ref}" + f"{GIT_PREFIX} push {remote_url} head-{pr_info.head_ref}:{pr_info.head_ref}" ) if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""): with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"): diff --git a/tests/ci/team_keys_lambda/__init__.py b/tests/ci/team_keys_lambda/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/ci/team_keys_lambda/app.py b/tests/ci/team_keys_lambda/app.py index 9e73a3f0993..870d41c441e 100644 --- a/tests/ci/team_keys_lambda/app.py +++ b/tests/ci/team_keys_lambda/app.py @@ -14,7 +14,7 @@ import boto3 # type: ignore class Keys(set): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.updated_at = 0 + self.updated_at = 0.0 def update_now(self): self.updated_at = datetime.now().timestamp() @@ -88,7 +88,7 @@ def get_token_from_aws() -> str: ) get_secret_value_response = client.get_secret_value(SecretId=secret_name) data = json.loads(get_secret_value_response["SecretString"]) - return data["clickhouse_robot_token"] + return data["clickhouse_robot_token"] # type: ignore def main(token: str, org: str, team_slug: str) -> str: diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index 7270cd6fb03..61404847bff 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -3,6 +3,7 @@ from subprocess import Popen, PIPE, STDOUT from threading import Thread from time import sleep +from typing import Optional import logging import os import sys @@ -18,7 +19,7 @@ class TeePopen: self.command = command self.log_file = log_file self.env = env - self.process = None + self._process = None # type: Optional[Popen] self.timeout = timeout def _check_timeout(self): @@ -51,7 +52,7 @@ class TeePopen: return self def __exit__(self, t, value, traceback): - for line in self.process.stdout: + for line in self.process.stdout: # type: ignore sys.stdout.write(line) self.log_file.write(line) @@ -59,8 +60,18 @@ class TeePopen: self.log_file.close() def wait(self): - for line in self.process.stdout: + for line in self.process.stdout: # type: ignore sys.stdout.write(line) self.log_file.write(line) return self.process.wait() + + @property + def process(self) -> Popen: + if self._process is not None: + return self._process + raise AttributeError("process is not created yet") + + @process.setter + def process(self, process: Popen) -> None: + self._process = process diff --git a/tests/ci/terminate_runner_lambda/__init__.py b/tests/ci/terminate_runner_lambda/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/ci/terminate_runner_lambda/app.py b/tests/ci/terminate_runner_lambda/app.py index 4140690e891..223555ced74 100644 --- a/tests/ci/terminate_runner_lambda/app.py +++ b/tests/ci/terminate_runner_lambda/app.py @@ -1,17 +1,18 @@ #!/usr/bin/env python3 -import requests import argparse -import jwt import sys import json import time from collections import namedtuple +from typing import Any, Dict, List, Tuple + +import boto3 # type: ignore +import requests # type: ignore +import jwt -def get_key_and_app_from_aws(): - import boto3 - +def get_key_and_app_from_aws() -> Tuple[str, int]: secret_name = "clickhouse_github_secret_key" session = boto3.session.Session() client = session.client( @@ -22,7 +23,7 @@ def get_key_and_app_from_aws(): return data["clickhouse-app-key"], int(data["clickhouse-app-id"]) -def get_installation_id(jwt_token): +def get_installation_id(jwt_token: str) -> int: headers = { "Authorization": f"Bearer {jwt_token}", "Accept": "application/vnd.github.v3+json", @@ -33,10 +34,12 @@ def get_installation_id(jwt_token): for installation in data: if installation["account"]["login"] == "ClickHouse": installation_id = installation["id"] - return installation_id + break + + return installation_id # type: ignore -def get_access_token(jwt_token, installation_id): +def get_access_token(jwt_token: str, installation_id: int) -> str: headers = { "Authorization": f"Bearer {jwt_token}", "Accept": "application/vnd.github.v3+json", @@ -47,15 +50,16 @@ def get_access_token(jwt_token, installation_id): ) response.raise_for_status() data = response.json() - return data["token"] + return data["token"] # type: ignore RunnerDescription = namedtuple( "RunnerDescription", ["id", "name", "tags", "offline", "busy"] ) +RunnerDescriptions = List[RunnerDescription] -def list_runners(access_token): +def list_runners(access_token: str) -> RunnerDescriptions: headers = { "Authorization": f"token {access_token}", "Accept": "application/vnd.github.v3+json", @@ -94,9 +98,9 @@ def list_runners(access_token): return result -def how_many_instances_to_kill(event_data): +def how_many_instances_to_kill(event_data: dict) -> Dict[str, int]: data_array = event_data["CapacityToTerminate"] - to_kill_by_zone = {} + to_kill_by_zone = {} # type: Dict[str, int] for av_zone in data_array: zone_name = av_zone["AvailabilityZone"] to_kill = av_zone["Capacity"] @@ -104,15 +108,16 @@ def how_many_instances_to_kill(event_data): to_kill_by_zone[zone_name] = 0 to_kill_by_zone[zone_name] += to_kill + return to_kill_by_zone -def get_candidates_to_be_killed(event_data): +def get_candidates_to_be_killed(event_data: dict) -> Dict[str, List[str]]: data_array = event_data["Instances"] - instances_by_zone = {} + instances_by_zone = {} # type: Dict[str, List[str]] for instance in data_array: zone_name = instance["AvailabilityZone"] - instance_id = instance["InstanceId"] + instance_id = instance["InstanceId"] # type: str if zone_name not in instances_by_zone: instances_by_zone[zone_name] = [] instances_by_zone[zone_name].append(instance_id) @@ -120,7 +125,7 @@ def get_candidates_to_be_killed(event_data): return instances_by_zone -def delete_runner(access_token, runner): +def delete_runner(access_token: str, runner: RunnerDescription) -> bool: headers = { "Authorization": f"token {access_token}", "Accept": "application/vnd.github.v3+json", @@ -134,10 +139,12 @@ def delete_runner(access_token, runner): print( f"Response code deleting {runner.name} with id {runner.id} is {response.status_code}" ) - return response.status_code == 204 + return bool(response.status_code == 204) -def main(github_secret_key, github_app_id, event): +def main( + github_secret_key: str, github_app_id: int, event: dict +) -> Dict[str, List[str]]: print("Got event", json.dumps(event, sort_keys=True, indent=4)) to_kill_by_zone = how_many_instances_to_kill(event) instances_by_zone = get_candidates_to_be_killed(event) @@ -156,17 +163,16 @@ def main(github_secret_key, github_app_id, event): to_delete_runners = [] instances_to_kill = [] - for zone in to_kill_by_zone: - num_to_kill = to_kill_by_zone[zone] + for zone, num_to_kill in to_kill_by_zone.items(): candidates = instances_by_zone[zone] if num_to_kill > len(candidates): raise Exception( f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}" ) - delete_for_av = [] + delete_for_av = [] # type: RunnerDescriptions for candidate in candidates: - if candidate not in set([runner.name for runner in runners]): + if candidate not in set(runner.name for runner in runners): print( f"Candidate {candidate} was not in runners list, simply delete it" ) @@ -214,7 +220,7 @@ def main(github_secret_key, github_app_id, event): return response -def handler(event, context): +def handler(event: dict, context: Any) -> Dict[str, List[str]]: private_key, app_id = get_key_and_app_from_aws() return main(private_key, app_id, event) diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index c2dfab9dddc..4777296da18 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -5,6 +5,7 @@ import os import sys import subprocess import atexit +from typing import List, Tuple from github import Github @@ -37,14 +38,16 @@ def get_test_name(line): raise Exception(f"No test name in line '{line}'") -def process_result(result_folder): +def process_results( + result_folder: str, +) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: OK_SIGN = "OK ]" FAILED_SIGN = "FAILED ]" SEGFAULT = "Segmentation fault" SIGNAL = "received signal SIG" PASSED = "PASSED" - summary = [] + summary = [] # type: List[Tuple[str, str]] total_counter = 0 failed_counter = 0 result_log_path = f"{result_folder}/test_result.txt" @@ -151,7 +154,7 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) s3_helper = S3Helper() - state, description, test_results, additional_logs = process_result(test_output) + state, description, test_results, additional_logs = process_results(test_output) ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index e145df02f80..9fcd3733acb 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -16,7 +16,7 @@ def process_logs( ): logging.info("Upload files to s3 %s", additional_logs) - processed_logs = {} + processed_logs = {} # type: ignore # Firstly convert paths of logs from test_results to urls to s3. for test_result in test_results: if len(test_result) <= 3 or with_raw_logs: diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 162bab6a50a..69cfba64be3 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -2,9 +2,9 @@ import logging import os.path as p from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, ArgumentTypeError -from typing import Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Literal, Optional, Tuple, Union -from git_helper import TWEAK, Git, get_tags, git_runner, removeprefix +from git_helper import TWEAK, Git as Git, get_tags, git_runner, removeprefix FILE_WITH_VERSION_PATH = "cmake/autogenerated_versions.txt" CHANGELOG_IN_PATH = "debian/changelog.in" @@ -45,7 +45,7 @@ class ClickHouseVersion: patch: Union[int, str], revision: Union[int, str], git: Optional[Git], - tweak: str = None, + tweak: Optional[str] = None, ): self._major = int(major) self._minor = int(minor) @@ -59,10 +59,15 @@ class ClickHouseVersion: self._tweak = self._git.tweak self._describe = "" - def update(self, part: str) -> "ClickHouseVersion": + def update(self, part: Literal["major", "minor", "patch"]) -> "ClickHouseVersion": """If part is valid, returns a new version""" - method = getattr(self, f"{part}_update") - return method() + if part == "major": + return self.major_update() + if part == "minor": + return self.minor_update() + if part == "patch": + return self.patch_update() + raise KeyError(f"wrong part {part} is used") def major_update(self) -> "ClickHouseVersion": if self._git is not None: @@ -139,10 +144,10 @@ class ClickHouseVersion: raise ValueError(f"version type {version_type} not in {VersionType.VALID}") self._describe = f"v{self.string}-{version_type}" - def __eq__(self, other) -> bool: + def __eq__(self, other: Any) -> bool: if not isinstance(self, type(other)): return NotImplemented - return ( + return bool( self.major == other.major and self.minor == other.minor and self.patch == other.patch @@ -170,7 +175,7 @@ class VersionType: VALID = (TESTING, PRESTABLE, STABLE, LTS) -def validate_version(version: str): +def validate_version(version: str) -> None: parts = version.split(".") if len(parts) != 4: raise ValueError(f"{version} does not contain 4 parts") @@ -259,7 +264,7 @@ def get_tagged_versions() -> List[ClickHouseVersion]: def update_cmake_version( version: ClickHouseVersion, versions_path: str = FILE_WITH_VERSION_PATH, -): +) -> None: path_to_file = get_abs_path(versions_path) with open(path_to_file, "w", encoding="utf-8") as f: f.write(VERSIONS_TEMPLATE.format_map(version.as_dict())) @@ -269,7 +274,7 @@ def update_contributors( relative_contributors_path: str = GENERATED_CONTRIBUTORS, force: bool = False, raise_error: bool = False, -): +) -> None: # Check if we have shallow checkout by comparing number of lines # '--is-shallow-repository' is in git since 2.15, 2017-10-30 if git_runner.run("git rev-parse --is-shallow-repository") == "true" and not force: diff --git a/tests/ci/version_test.py b/tests/ci/version_test.py index 86a2d58c3c8..abd0f9349f4 100644 --- a/tests/ci/version_test.py +++ b/tests/ci/version_test.py @@ -17,9 +17,9 @@ class TestFunctions(unittest.TestCase): ("v1.1.1.2-testing", vh.get_version_from_string("1.1.1.2")), ("refs/tags/v1.1.1.2-testing", vh.get_version_from_string("1.1.1.2")), ) - for case in cases: - version = vh.version_arg(case[0]) - self.assertEqual(case[1], version) + for test_case in cases: + version = vh.version_arg(test_case[0]) + self.assertEqual(test_case[1], version) error_cases = ( "0.0.0", "1.1.1.a", @@ -28,6 +28,6 @@ class TestFunctions(unittest.TestCase): "v1.1.1.2-testin", "refs/tags/v1.1.1.2-testin", ) - for case in error_cases: + for error_case in error_cases: with self.assertRaises(ArgumentTypeError): - version = vh.version_arg(case[0]) + version = vh.version_arg(error_case[0]) diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 23e808b0861..d285e29943d 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -313,7 +313,7 @@ def check_suspicious_changed_files(changed_files): return False -def approve_run(workflow_description: WorkflowDescription, token): +def approve_run(workflow_description: WorkflowDescription, token: str) -> None: url = f"{workflow_description.api_url}/approve" _exec_post_with_retry(url, token) @@ -391,7 +391,7 @@ def rerun_workflow(workflow_description, token): def check_workflow_completed( - event_data, workflow_description: WorkflowDescription, token: str + event_data: dict, workflow_description: WorkflowDescription, token: str ) -> bool: if workflow_description.action == "completed": attempt = 0 diff --git a/tests/integration/test_alter_settings_on_cluster/test.py b/tests/integration/test_alter_settings_on_cluster/test.py index 6ab3d446b59..32f7f2efa30 100644 --- a/tests/integration/test_alter_settings_on_cluster/test.py +++ b/tests/integration/test_alter_settings_on_cluster/test.py @@ -52,3 +52,24 @@ def test_default_database_on_cluster(started_cluster): database="test_default_database", sql="SHOW CREATE test_local_table FORMAT TSV", ).endswith("old_parts_lifetime = 100\n") + + ch1.query_and_get_error( + database="test_default_database", + sql="ALTER TABLE test_local_table MODIFY SETTING temporary_directories_lifetime = 1 RESET SETTING old_parts_lifetime;", + ) + + ch1.query_and_get_error( + database="test_default_database", + sql="ALTER TABLE test_local_table RESET SETTING old_parts_lifetime MODIFY SETTING temporary_directories_lifetime = 1;", + ) + + ch1.query( + database="test_default_database", + sql="ALTER TABLE test_local_table ON CLUSTER 'cluster' RESET SETTING old_parts_lifetime;", + ) + + for node in [ch1, ch2]: + assert not node.query( + database="test_default_database", + sql="SHOW CREATE test_local_table FORMAT TSV", + ).endswith("old_parts_lifetime = 100\n") diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index c94dc6d4a87..7eeabde1380 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1,6 +1,7 @@ import pytest import asyncio import re +import random import os.path from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry, TSV @@ -1158,3 +1159,71 @@ def test_mutation(): instance.query("DROP TABLE test.table") instance.query(f"RESTORE TABLE test.table FROM {backup_name}") + + +def test_tables_dependency(): + instance.query("CREATE DATABASE test") + instance.query("CREATE DATABASE test2") + + # For this test we use random names of tables to check they're created according to their dependency (not just in alphabetic order). + random_table_names = [f"{chr(ord('A')+i)}" for i in range(0, 10)] + random.shuffle(random_table_names) + random_table_names = [ + random.choice(["test", "test2"]) + "." + table_name + for table_name in random_table_names + ] + print(f"random_table_names={random_table_names}") + + t1 = random_table_names[0] + t2 = random_table_names[1] + t3 = random_table_names[2] + t4 = random_table_names[3] + t5 = random_table_names[4] + t6 = random_table_names[5] + + # Create a materialized view and a dictionary with a local table as source. + instance.query( + f"CREATE TABLE {t1} (x Int64, y String) ENGINE=MergeTree ORDER BY tuple()" + ) + + instance.query( + f"CREATE TABLE {t2} (x Int64, y String) ENGINE=MergeTree ORDER BY tuple()" + ) + + instance.query(f"CREATE MATERIALIZED VIEW {t3} TO {t2} AS SELECT x, y FROM {t1}") + + instance.query( + f"CREATE DICTIONARY {t4} (x Int64, y String) PRIMARY KEY x SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE '{t1.split('.')[1]}' DB '{t1.split('.')[0]}')) LAYOUT(FLAT()) LIFETIME(0)" + ) + + instance.query(f"CREATE TABLE {t5} AS dictionary({t4})") + + instance.query( + f"CREATE TABLE {t6}(x Int64, y String DEFAULT dictGet({t4}, 'y', x)) ENGINE=MergeTree ORDER BY tuple()" + ) + + # Make backup. + backup_name = new_backup_name() + instance.query(f"BACKUP DATABASE test, DATABASE test2 TO {backup_name}") + + # Drop everything in reversive order. + def drop(): + instance.query(f"DROP TABLE {t6} NO DELAY") + instance.query(f"DROP TABLE {t5} NO DELAY") + instance.query(f"DROP DICTIONARY {t4}") + instance.query(f"DROP TABLE {t3} NO DELAY") + instance.query(f"DROP TABLE {t2} NO DELAY") + instance.query(f"DROP TABLE {t1} NO DELAY") + instance.query("DROP DATABASE test NO DELAY") + instance.query("DROP DATABASE test2 NO DELAY") + + drop() + + # Restore everything and check. + instance.query(f"RESTORE ALL FROM {backup_name}") + + assert instance.query( + "SELECT concat(database, '.', name) AS c FROM system.tables WHERE database IN ['test', 'test2'] ORDER BY c" + ) == TSV(sorted([t1, t2, t3, t4, t5, t6])) + + drop() diff --git a/tests/integration/test_named_collections/__init__.py b/tests/integration/test_named_collections/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_named_collections/configs/config.d/named_collections.xml b/tests/integration/test_named_collections/configs/config.d/named_collections.xml new file mode 100644 index 00000000000..d24fb303b37 --- /dev/null +++ b/tests/integration/test_named_collections/configs/config.d/named_collections.xml @@ -0,0 +1,7 @@ + + + + value1 + + + diff --git a/tests/integration/test_named_collections/configs/users.d/users.xml b/tests/integration/test_named_collections/configs/users.d/users.xml new file mode 100644 index 00000000000..ee38baa3df9 --- /dev/null +++ b/tests/integration/test_named_collections/configs/users.d/users.xml @@ -0,0 +1,13 @@ + + + + + + ::/0 + + default + default + 1 + + + diff --git a/tests/integration/test_named_collections/test.py b/tests/integration/test_named_collections/test.py new file mode 100644 index 00000000000..ce5c8aaa62e --- /dev/null +++ b/tests/integration/test_named_collections/test.py @@ -0,0 +1,200 @@ +import logging +import pytest +import os +import time +from helpers.cluster import ClickHouseCluster + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +NAMED_COLLECTIONS_CONFIG = os.path.join( + SCRIPT_DIR, "./configs/config.d/named_collections.xml" +) + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node", + main_configs=[ + "configs/config.d/named_collections.xml", + ], + user_configs=[ + "configs/users.d/users.xml", + ], + stay_alive=True, + ) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def replace_config(node, old, new): + node.replace_in_config( + "/etc/clickhouse-server/config.d/named_collections.xml", + old, + new, + ) + + +def test_config_reload(cluster): + node = cluster.instances["node"] + assert ( + "collection1" == node.query("select name from system.named_collections").strip() + ) + assert ( + "['key1']" + == node.query( + "select mapKeys(collection) from system.named_collections where name = 'collection1'" + ).strip() + ) + assert ( + "value1" + == node.query( + "select collection['key1'] from system.named_collections where name = 'collection1'" + ).strip() + ) + + replace_config(node, "value1", "value2") + node.query("SYSTEM RELOAD CONFIG") + + assert ( + "['key1']" + == node.query( + "select mapKeys(collection) from system.named_collections where name = 'collection1'" + ).strip() + ) + assert ( + "value2" + == node.query( + "select collection['key1'] from system.named_collections where name = 'collection1'" + ).strip() + ) + + +def test_sql_commands(cluster): + node = cluster.instances["node"] + assert "1" == node.query("select count() from system.named_collections").strip() + + node.query("CREATE NAMED COLLECTION collection2 AS key1=1, key2='value2'") + + def check_created(): + assert ( + "collection1\ncollection2" + == node.query("select name from system.named_collections").strip() + ) + + assert ( + "['key1','key2']" + == node.query( + "select mapKeys(collection) from system.named_collections where name = 'collection2'" + ).strip() + ) + + assert ( + "1" + == node.query( + "select collection['key1'] from system.named_collections where name = 'collection2'" + ).strip() + ) + + assert ( + "value2" + == node.query( + "select collection['key2'] from system.named_collections where name = 'collection2'" + ).strip() + ) + + check_created() + node.restart_clickhouse() + check_created() + + node.query("ALTER NAMED COLLECTION collection2 SET key1=4, key3='value3'") + + def check_altered(): + assert ( + "['key1','key2','key3']" + == node.query( + "select mapKeys(collection) from system.named_collections where name = 'collection2'" + ).strip() + ) + + assert ( + "4" + == node.query( + "select collection['key1'] from system.named_collections where name = 'collection2'" + ).strip() + ) + + assert ( + "value3" + == node.query( + "select collection['key3'] from system.named_collections where name = 'collection2'" + ).strip() + ) + + check_altered() + node.restart_clickhouse() + check_altered() + + node.query("ALTER NAMED COLLECTION collection2 DELETE key2") + + def check_deleted(): + assert ( + "['key1','key3']" + == node.query( + "select mapKeys(collection) from system.named_collections where name = 'collection2'" + ).strip() + ) + + check_deleted() + node.restart_clickhouse() + check_deleted() + + node.query( + "ALTER NAMED COLLECTION collection2 SET key3=3, key4='value4' DELETE key1" + ) + + def check_altered_and_deleted(): + assert ( + "['key3','key4']" + == node.query( + "select mapKeys(collection) from system.named_collections where name = 'collection2'" + ).strip() + ) + + assert ( + "3" + == node.query( + "select collection['key3'] from system.named_collections where name = 'collection2'" + ).strip() + ) + + assert ( + "value4" + == node.query( + "select collection['key4'] from system.named_collections where name = 'collection2'" + ).strip() + ) + + check_altered_and_deleted() + node.restart_clickhouse() + check_altered_and_deleted() + + node.query("DROP NAMED COLLECTION collection2") + + def check_dropped(): + assert "1" == node.query("select count() from system.named_collections").strip() + assert ( + "collection1" + == node.query("select name from system.named_collections").strip() + ) + + check_dropped() + node.restart_clickhouse() + check_dropped() diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index de5433d5beb..1e6a39ee1bd 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -592,60 +592,64 @@ def test_alters_from_different_replicas(started_cluster): def create_some_tables(db): settings = {"distributed_ddl_task_timeout": 0} - main_node.query( - "CREATE TABLE {}.t1 (n int) ENGINE=Memory".format(db), settings=settings - ) + main_node.query(f"CREATE TABLE {db}.t1 (n int) ENGINE=Memory", settings=settings) dummy_node.query( - "CREATE TABLE {}.t2 (s String) ENGINE=Memory".format(db), settings=settings + f"CREATE TABLE {db}.t2 (s String) ENGINE=Memory", settings=settings ) main_node.query( - "CREATE TABLE {}.mt1 (n int) ENGINE=MergeTree order by n".format(db), + f"CREATE TABLE {db}.mt1 (n int) ENGINE=MergeTree order by n", settings=settings, ) dummy_node.query( - "CREATE TABLE {}.mt2 (n int) ENGINE=MergeTree order by n".format(db), + f"CREATE TABLE {db}.mt2 (n int) ENGINE=MergeTree order by n", settings=settings, ) main_node.query( - "CREATE TABLE {}.rmt1 (n int) ENGINE=ReplicatedMergeTree order by n".format(db), + f"CREATE TABLE {db}.rmt1 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings, ) dummy_node.query( - "CREATE TABLE {}.rmt2 (n int) ENGINE=ReplicatedMergeTree order by n".format(db), + f"CREATE TABLE {db}.rmt2 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings, ) main_node.query( - "CREATE TABLE {}.rmt3 (n int) ENGINE=ReplicatedMergeTree order by n".format(db), + f"CREATE TABLE {db}.rmt3 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings, ) dummy_node.query( - "CREATE TABLE {}.rmt5 (n int) ENGINE=ReplicatedMergeTree order by n".format(db), + f"CREATE TABLE {db}.rmt5 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings, ) main_node.query( - "CREATE MATERIALIZED VIEW {}.mv1 (n int) ENGINE=ReplicatedMergeTree order by n AS SELECT n FROM recover.rmt1".format( - db - ), + f"CREATE MATERIALIZED VIEW {db}.mv1 (n int) ENGINE=ReplicatedMergeTree order by n AS SELECT n FROM recover.rmt1", settings=settings, ) dummy_node.query( - "CREATE MATERIALIZED VIEW {}.mv2 (n int) ENGINE=ReplicatedMergeTree order by n AS SELECT n FROM recover.rmt2".format( - db - ), + f"CREATE MATERIALIZED VIEW {db}.mv2 (n int) ENGINE=ReplicatedMergeTree order by n AS SELECT n FROM recover.rmt2", settings=settings, ) main_node.query( - "CREATE DICTIONARY {}.d1 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n " + f"CREATE DICTIONARY {db}.d1 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n " "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt1' PASSWORD '' DB 'recover')) " - "LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())".format(db) + "LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())" ) dummy_node.query( - "CREATE DICTIONARY {}.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n " + f"CREATE DICTIONARY {db}.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n " "SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt2' PASSWORD '' DB 'recover')) " - "LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())".format(db) + "LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())" ) +# These tables are used to check that DatabaseReplicated correctly renames all the tables in case when it restores from the lost state +def create_table_for_exchanges(db): + settings = {"distributed_ddl_task_timeout": 0} + for table in ["a1", "a2", "a3", "a4", "a5", "a6"]: + main_node.query( + f"CREATE TABLE {db}.{table} (s String) ENGINE=ReplicatedMergeTree order by s", + settings=settings, + ) + + def test_recover_staled_replica(started_cluster): main_node.query( "CREATE DATABASE recover ENGINE = Replicated('/clickhouse/databases/recover', 'shard1', 'replica1');" @@ -659,13 +663,20 @@ def test_recover_staled_replica(started_cluster): settings = {"distributed_ddl_task_timeout": 0} create_some_tables("recover") + create_table_for_exchanges("recover") for table in ["t1", "t2", "mt1", "mt2", "rmt1", "rmt2", "rmt3", "rmt5"]: - main_node.query("INSERT INTO recover.{} VALUES (42)".format(table)) + main_node.query(f"INSERT INTO recover.{table} VALUES (42)") for table in ["t1", "t2", "mt1", "mt2"]: - dummy_node.query("INSERT INTO recover.{} VALUES (42)".format(table)) + dummy_node.query(f"INSERT INTO recover.{table} VALUES (42)") + + for i, table in enumerate(["a1", "a2", "a3", "a4", "a5", "a6"]): + main_node.query(f"INSERT INTO recover.{table} VALUES ('{str(i + 1) * 10}')") + for table in ["rmt1", "rmt2", "rmt3", "rmt5"]: - main_node.query("SYSTEM SYNC REPLICA recover.{}".format(table)) + main_node.query(f"SYSTEM SYNC REPLICA recover.{table}") + for table in ["a1", "a2", "a3", "a4", "a5", "a6"]: + main_node.query(f"SYSTEM SYNC REPLICA recover.{table}") with PartitionManager() as pm: pm.drop_instance_zk_connections(dummy_node) @@ -699,19 +710,15 @@ def test_recover_staled_replica(started_cluster): ).strip() ) main_node.query_with_retry( - "ALTER TABLE recover.`{}` MODIFY COLUMN n int DEFAULT 42".format( - inner_table - ), + f"ALTER TABLE recover.`{inner_table}` MODIFY COLUMN n int DEFAULT 42", settings=settings, ) main_node.query_with_retry( - "ALTER TABLE recover.mv1 MODIFY QUERY SELECT m FROM recover.rmt1".format( - inner_table - ), + "ALTER TABLE recover.mv1 MODIFY QUERY SELECT m FROM recover.rmt1", settings=settings, ) main_node.query_with_retry( - "RENAME TABLE recover.mv2 TO recover.mv3".format(inner_table), + "RENAME TABLE recover.mv2 TO recover.mv3", settings=settings, ) @@ -727,11 +734,18 @@ def test_recover_staled_replica(started_cluster): "CREATE TABLE recover.tmp AS recover.m1", settings=settings ) + main_node.query("EXCHANGE TABLES recover.a1 AND recover.a2", settings=settings) + main_node.query("EXCHANGE TABLES recover.a3 AND recover.a4", settings=settings) + main_node.query("EXCHANGE TABLES recover.a5 AND recover.a4", settings=settings) + main_node.query("EXCHANGE TABLES recover.a6 AND recover.a3", settings=settings) + main_node.query("RENAME TABLE recover.a6 TO recover.a7", settings=settings) + main_node.query("RENAME TABLE recover.a1 TO recover.a8", settings=settings) + assert ( main_node.query( "SELECT name FROM system.tables WHERE database='recover' AND name NOT LIKE '.inner_id.%' ORDER BY name" ) - == "d1\nd2\nm1\nmt1\nmt2\nmv1\nmv3\nrmt1\nrmt2\nrmt4\nt2\ntmp\n" + == "a2\na3\na4\na5\na7\na8\nd1\nd2\nm1\nmt1\nmt2\nmv1\nmv3\nrmt1\nrmt2\nrmt4\nt2\ntmp\n" ) query = ( "SELECT name, uuid, create_table_query FROM system.tables WHERE database='recover' AND name NOT LIKE '.inner_id.%' " @@ -752,6 +766,12 @@ def test_recover_staled_replica(started_cluster): == "2\n" ) + # Check that Database Replicated renamed all the tables correctly + for i, table in enumerate(["a2", "a8", "a5", "a7", "a4", "a3"]): + assert ( + dummy_node.query(f"SELECT * FROM recover.{table}") == f"{str(i + 1) * 10}\n" + ) + for table in [ "m1", "t2", @@ -765,11 +785,11 @@ def test_recover_staled_replica(started_cluster): "mv1", "mv3", ]: - assert main_node.query("SELECT (*,).1 FROM recover.{}".format(table)) == "42\n" + assert main_node.query(f"SELECT (*,).1 FROM recover.{table}") == "42\n" for table in ["t2", "rmt1", "rmt2", "rmt4", "d1", "d2", "mt2", "mv1", "mv3"]: - assert dummy_node.query("SELECT (*,).1 FROM recover.{}".format(table)) == "42\n" + assert dummy_node.query(f"SELECT (*,).1 FROM recover.{table}") == "42\n" for table in ["m1", "mt1"]: - assert dummy_node.query("SELECT count() FROM recover.{}".format(table)) == "0\n" + assert dummy_node.query(f"SELECT count() FROM recover.{table}") == "0\n" global test_recover_staled_replica_run assert ( dummy_node.query( @@ -784,20 +804,22 @@ def test_recover_staled_replica(started_cluster): == f"{test_recover_staled_replica_run}\n" ) test_recover_staled_replica_run += 1 + + print(dummy_node.query("SHOW DATABASES")) + print(dummy_node.query("SHOW TABLES FROM recover_broken_tables")) + print(dummy_node.query("SHOW TABLES FROM recover_broken_replicated_tables")) + table = dummy_node.query( - "SHOW TABLES FROM recover_broken_tables LIKE 'mt1_29_%' LIMIT 1" + "SHOW TABLES FROM recover_broken_tables LIKE 'mt1_41_%' LIMIT 1" ).strip() assert ( - dummy_node.query("SELECT (*,).1 FROM recover_broken_tables.{}".format(table)) - == "42\n" + dummy_node.query(f"SELECT (*,).1 FROM recover_broken_tables.{table}") == "42\n" ) table = dummy_node.query( - "SHOW TABLES FROM recover_broken_replicated_tables LIKE 'rmt5_29_%' LIMIT 1" + "SHOW TABLES FROM recover_broken_replicated_tables LIKE 'rmt5_41_%' LIMIT 1" ).strip() assert ( - dummy_node.query( - "SELECT (*,).1 FROM recover_broken_replicated_tables.{}".format(table) - ) + dummy_node.query(f"SELECT (*,).1 FROM recover_broken_replicated_tables.{table}") == "42\n" ) diff --git a/tests/performance/query_interpretation_join.xml b/tests/performance/query_interpretation_join.xml new file mode 100644 index 00000000000..5bbb0baf842 --- /dev/null +++ b/tests/performance/query_interpretation_join.xml @@ -0,0 +1,393 @@ + + + CREATE TABLE IF NOT EXISTS interpret_table_01 + ( + `idColumnU64` UInt64, + `dateColumn` DateTime, + `aggCount` AggregateFunction(count), + `aggArgMaxFloat32_1` AggregateFunction(argMax, Float32, DateTime), + `aggArgMaxString` AggregateFunction(argMax, String, DateTime), + `aggArgMaxFloat32_2` AggregateFunction(argMax, Float32, DateTime), + `nDateTime_02_date` SimpleAggregateFunction(max, DateTime), + `nDateTime_02_date_292929292` SimpleAggregateFunction(max, DateTime), + `agg_topk_uint32` AggregateFunction(topKWeighted(2), UInt32, UInt32), + `agg_argmax_string_datetime_01` AggregateFunction(argMax, String, DateTime), + `agg_argmax_string_u8_01` AggregateFunction(argMax, UInt8, DateTime), + `agg_argmax_string_datetime_02` AggregateFunction(argMax, String, DateTime), + `agg_argmax_string_datetime_03` AggregateFunction(argMax, String, DateTime), + `agg_argmax_string_datetime_04` AggregateFunction(argMax, String, DateTime), + `agg_argmax_string_datetime_05` AggregateFunction(argMax, String, DateTime), + `agg_argmax_string_datetime_06` AggregateFunction(argMax, String, DateTime), + `agg_argmax_string_datetime_07` AggregateFunction(argMax, String, DateTime), + `agg_argmax_string_u8_02` AggregateFunction(argMax, UInt8, DateTime), + `agg_argmax_string_u8_03` AggregateFunction(argMax, UInt8, DateTime), + `agg_argmax_string_u8_04` AggregateFunction(argMax, UInt8, DateTime), + `agg_count_01` AggregateFunction(count), + `agg_count_02` AggregateFunction(count), + `agg_count_03` AggregateFunction(count), + `agg_count_04` AggregateFunction(count), + `agg_count_05` AggregateFunction(count), + `agg_count_06` AggregateFunction(count), + `agg_count_07` AggregateFunction(count), + `agg_count_08` AggregateFunction(count), + `agg_count_09` AggregateFunction(count), + `agg_count_10` AggregateFunction(count), + `agg_count_11` AggregateFunction(count), + `agg_count_12` AggregateFunction(count), + `agg_count_13` AggregateFunction(count), + `agg_count_14` AggregateFunction(count), + `agg_count_15` AggregateFunction(count), + `agg_count_16` AggregateFunction(count), + `agg_argmax_string_datetime_08` AggregateFunction(argMax, String, DateTime), + `agg_argmax_f32_datetime_01` AggregateFunction(argMax, Float32, DateTime), + `agg_argmax_string_datetime_09` AggregateFunction(argMax, String, DateTime), + `agg_argmax_f32_datetime_02` AggregateFunction(argMax, Float32, DateTime), + `agg_argmax_date_datetime_01` AggregateFunction(argMax, Date, DateTime), + `agg_argmax_date_datetime_02` AggregateFunction(argMax, Date, DateTime), + `agg_argmax_u8_other_01` AggregateFunction(argMax, UInt8, DateTime), + `agg_argmax_u8_other_02` AggregateFunction(argMax, UInt8, DateTime), + `agg_argmax_u8_other_03` AggregateFunction(argMax, UInt8, DateTime), + `agg_argmax_u8_other_04` AggregateFunction(argMax, UInt8, DateTime), + `agg_argmax_string_datetime_10` AggregateFunction(argMax, String, DateTime), + `agg_argmax_u8_other_05` AggregateFunction(argMax, UInt8, DateTime), + `agg_argmax_u8_other_06` AggregateFunction(argMax, UInt8, DateTime), + `agg_argmax_u8_other_07` AggregateFunction(argMax, UInt8, DateTime), + `agg_argmax_string_datetime_11` AggregateFunction(argMax, String, DateTime), + `other_max_datetime_01` SimpleAggregateFunction(max, DateTime), + `other_max_datetime_02` SimpleAggregateFunction(max, DateTime), + `nDateTime_03_date` SimpleAggregateFunction(max, DateTime), + `nDateTime_03_shown_date` SimpleAggregateFunction(max, DateTime), + `nDateTime_04_date` SimpleAggregateFunction(max, DateTime), + `nDateTime_04_shown_date` SimpleAggregateFunction(max, DateTime), + `aggCount_3` AggregateFunction(count), + `uniq_date_agg` AggregateFunction(uniq, Date), + `aggCount_4` AggregateFunction(count), + `agg_argmax_u128_datetime_01` AggregateFunction(argMax, UInt128, DateTime), + `topk_u128_01` AggregateFunction(topKWeighted(5), UInt128, UInt32), + `agg_argmax_string_datetime_12` AggregateFunction(argMax, String, DateTime), + `agg_argmax_string_datetime_13` AggregateFunction(argMax, String, DateTime), + `agg_argmax_string_datetime_14` AggregateFunction(argMax, String, DateTime), + `agg_argmax_string_datetime_15` AggregateFunction(argMax, String, DateTime), + `agg_argmax_u32_datetime_01` AggregateFunction(argMax, UInt32, DateTime), + `agg_argmax_string_datetime_16` AggregateFunction(argMax, String, DateTime), + `agg_argmax_string_u8_100` AggregateFunction(argMax, String, UInt8), + `agg_argmax_string_datetime_18` AggregateFunction(argMax, String, DateTime), + `other_max_datetime_05` SimpleAggregateFunction(max, DateTime), + `topk_Datetime_u32_u32` AggregateFunction(topKWeighted(5), UInt32, UInt32), + `agg_argmax_string_datetime_17` AggregateFunction(argMax, String, DateTime), + `other_max_datetime_09` SimpleAggregateFunction(max, DateTime), + `agg_count_17` AggregateFunction(count), + `agg_count_18` AggregateFunction(count), + `agg_count_19` AggregateFunction(count), + `agg_count_20` AggregateFunction(count) + ) + ENGINE = AggregatingMergeTree() + PARTITION BY toYYYYMM(dateColumn) + ORDER BY idColumnU64 + TTL dateColumn + toIntervalMonth(6) + SETTINGS index_granularity = 1024, ttl_only_drop_parts = 1, min_rows_for_wide_part = 1000000000; + + DROP TABLE interpret_table_01 + + + CREATE TABLE IF NOT EXISTS interpret_table_02 + ( + `idColumnU64` UInt64, + `dateColumn` DateTime, + `agg_uniq_u128_01` AggregateFunction(uniq, UInt128), + `agg_uniq_u128_02` AggregateFunction(uniq, UInt128), + `aggCount` AggregateFunction(count), + `agg_uniq_u128_03` AggregateFunction(uniq, UInt128), + `agg_uniq_u128_04` AggregateFunction(uniq, UInt128), + `aggCount_3` AggregateFunction(count), + `aggCount_4` AggregateFunction(count), + `agg_topk_01` AggregateFunction(topKWeighted(2), UInt128, UInt64) + ) + ENGINE = AggregatingMergeTree() + PARTITION BY toYYYYMM(dateColumn) + ORDER BY idColumnU64 + TTL dateColumn + toIntervalMonth(6) + SETTINGS index_granularity = 1024, ttl_only_drop_parts = 1; + + DROP TABLE interpret_table_02 + + + CREATE TABLE IF NOT EXISTS interpret_table_03 + ( + `idColumnU64` UInt64, + `dateColumn` Date, + `aggCount` AggregateFunction(count), + `aggCount_2` AggregateFunction(count), + `aggCount_2_shown` AggregateFunction(count), + `minDate` SimpleAggregateFunction(min, Date), + `maxDate` SimpleAggregateFunction(max, Date), + `maxInt16` SimpleAggregateFunction(max, Int16), + `minUInt16` SimpleAggregateFunction(min, UInt16), + `minUInt16_2` SimpleAggregateFunction(min, UInt16), + `aggCount_3` AggregateFunction(count), + `aggCount_4` AggregateFunction(count) + ) + ENGINE = AggregatingMergeTree() + PARTITION BY toYYYYMM(dateColumn) + ORDER BY (idColumnU64, dateColumn) + TTL dateColumn + toIntervalDay(30) + SETTINGS index_granularity = 1024, ttl_only_drop_parts = 1; + + DROP TABLE interpret_table_03 + + + CREATE TABLE IF NOT EXISTS interpret_table_04 + ( + `idColumnU64` UInt64, + `dateColumn` DateTime, + `u128_id_02` UInt128, + `ls_01` LowCardinality(String), + `agg_count_01` AggregateFunction(count), + `agg_count_02` AggregateFunction(count), + `agg_smax_datetime_01` SimpleAggregateFunction(max, DateTime), + `agg_smax_datetime_02` SimpleAggregateFunction(max, DateTime), + `agg_count_03` AggregateFunction(count), + `agg_count_04` AggregateFunction(count) + ) + ENGINE = AggregatingMergeTree() + PARTITION BY toYYYYMM(dateColumn) + ORDER BY (idColumnU64, u128_id_02, ls_01) + TTL dateColumn + toIntervalMonth(6) + SETTINGS index_granularity = 1024, ttl_only_drop_parts = 1; + + DROP TABLE interpret_table_04 + + + CREATE TABLE IF NOT EXISTS interpret_table_05 + ( + `idColumnU64` UInt64, + `dateColumn` Date, + `agg_uniq_u128_01` AggregateFunction(uniq, UInt128), + `agg_uniq_u128_02` AggregateFunction(uniq, UInt128), + `agg_uniq_u128_03` AggregateFunction(uniq, UInt128), + `agg_uniq_u128_04` AggregateFunction(uniq, UInt128), + `aggCount_3` AggregateFunction(count), + `aggCount_4` AggregateFunction(count), + `aggCount` AggregateFunction(count) + ) + ENGINE = AggregatingMergeTree() + PARTITION BY toYYYYMM(dateColumn) + ORDER BY (idColumnU64, dateColumn) + TTL dateColumn + toIntervalDay(30) + SETTINGS index_granularity = 1024, ttl_only_drop_parts = 1; + + DROP TABLE interpret_table_05 + + + CREATE TABLE IF NOT EXISTS interpret_table_06 + ( + `idColumnU64` UInt64, + `dateColumn` DateTime, + `aggCount_3` AggregateFunction(count), + `aggCount` AggregateFunction(count), + `sagg_max_date` SimpleAggregateFunction(max, DateTime) + ) + ENGINE = AggregatingMergeTree() + PARTITION BY toYYYYMM(dateColumn) + ORDER BY idColumnU64 + TTL dateColumn + toIntervalMonth(6) + SETTINGS index_granularity = 1024, ttl_only_drop_parts = 1; + + DROP TABLE interpret_table_06 + + + CREATE TABLE IF NOT EXISTS interpret_table_07 + ( + `idU128` UInt128, + `idU128_2` UInt128, + `idU128_3` UInt128, + `nI16` Nullable(Int16) DEFAULT CAST(NULL, 'Nullable(Int16)'), + `idColumnI64` Nullable(Int64) DEFAULT CAST(NULL, 'Nullable(Int64)'), + `nStr` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nStr_2` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nI16_02` Nullable(Int16) DEFAULT CAST(NULL, 'Nullable(Int16)'), + `nStr_3` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nStr_4` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nStr_5` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nI8_01` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nI8_02` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nI8_03` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nI8_04` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nI8_05` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nI8_06` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nDate_01` Nullable(Date) DEFAULT CAST(NULL, 'Nullable(Date)'), + `nStr_6` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nStr_7` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nStr_8` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nStr_9` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nStr_10` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nStr_11` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nI8_07` Nullable(UInt8) DEFAULT CAST(NULL, 'Nullable(UInt8)'), + `nI8_08` Nullable(UInt8) DEFAULT CAST(NULL, 'Nullable(UInt8)'), + `Str_01` String, + `nI32_01` Nullable(Int32) DEFAULT CAST(NULL, 'Nullable(Int32)'), + `nI8_19` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nI8_09` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nI8_10` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nI8_11` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nI8_12` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nI8_13` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nI8_14` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nStr_12` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nStr_13` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nI8_15` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nI8_16` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nDateTime_01` Nullable(DateTime) DEFAULT CAST(NULL, 'Nullable(DateTime)'), + `nDateTime_02` Nullable(DateTime) DEFAULT CAST(NULL, 'Nullable(DateTime)'), + `nDateTime_03` Nullable(DateTime) DEFAULT CAST(NULL, 'Nullable(DateTime)'), + `nDateTime_04` Nullable(DateTime) DEFAULT CAST(NULL, 'Nullable(DateTime)'), + `nDateTime_05` Nullable(DateTime) DEFAULT CAST(NULL, 'Nullable(DateTime)'), + `nDateTime_06` Nullable(DateTime) DEFAULT CAST(NULL, 'Nullable(DateTime)'), + `nDateTime_07` Nullable(DateTime) DEFAULT CAST(NULL, 'Nullable(DateTime)'), + `nDateTime_08` Nullable(DateTime) DEFAULT CAST(NULL, 'Nullable(DateTime)'), + `nDateTime_09` Nullable(DateTime) DEFAULT CAST(NULL, 'Nullable(DateTime)'), + `nDateTime_10` Nullable(DateTime) DEFAULT CAST(NULL, 'Nullable(DateTime)'), + `nDateTime_11` Nullable(DateTime) DEFAULT CAST(NULL, 'Nullable(DateTime)'), + `nDateTime_12` Nullable(DateTime) DEFAULT CAST(NULL, 'Nullable(DateTime)'), + `nF64_01` Nullable(Float64) DEFAULT CAST(NULL, 'Nullable(Float64)'), + `nStr_14` Nullable(String) DEFAULT CAST(NULL, 'Nullable(String)'), + `nDate_02` Nullable(Date) DEFAULT CAST(NULL, 'Nullable(Date)'), + `nDateTime_13` Nullable(DateTime) DEFAULT CAST(NULL, 'Nullable(DateTime)'), + `nF64_02` Nullable(Float64) DEFAULT CAST(NULL, 'Nullable(Float64)'), + `nF64_03` Nullable(Float64) DEFAULT CAST(NULL, 'Nullable(Float64)'), + `nF64_04` Nullable(Float64) DEFAULT CAST(NULL, 'Nullable(Float64)'), + `nF64_05` Nullable(Float64) DEFAULT CAST(NULL, 'Nullable(Float64)'), + `nI8_18` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)'), + `nI8_17` Nullable(Int8) DEFAULT CAST(NULL, 'Nullable(Int8)') + ) + ENGINE = Join(ANY, LEFT, idU128); + + DROP TABLE interpret_table_07 + + + CREATE TABLE IF NOT EXISTS interpret_table_08 + ( + `idColumnU64` UInt64, + `dateColumn` Date, + `aggCount_3` AggregateFunction(count), + `aggCount_4` AggregateFunction(count) + ) + ENGINE = AggregatingMergeTree() + PARTITION BY toYYYYMM(dateColumn) + ORDER BY (idColumnU64, dateColumn) + TTL dateColumn + toIntervalDay(30) + SETTINGS index_granularity = 1024, ttl_only_drop_parts = 1; + + DROP TABLE interpret_table_08 + + + + SELECT * + FROM + ( + SELECT + cityHash64('0321352416546546546546546546546', lower('BU'), lower('random2'), toUInt128(toUInt128('1015029'))) AS idColumnU64, + * + FROM + ( + SELECT + if(max(nDateTime_02_date_292929292) > '2020-10-31 00:00:00', max(nDateTime_02_date_292929292), NULL) AS o1, + if(max(other_max_datetime_05) > '2020-10-31 00:00:00', max(other_max_datetime_05), NULL) AS o2, + if(max(nDateTime_03_date) > '2020-10-31 00:00:00', max(nDateTime_03_date), NULL) AS o3, + if(max(nDateTime_04_date) > '2020-10-31 00:00:00', max(nDateTime_04_date), NULL) AS o4, + if(max(nDateTime_02_date) > '2020-10-31 00:00:00', max(nDateTime_02_date), NULL) AS o5, + if(max(other_max_datetime_01) > '2020-10-31 00:00:00', max(other_max_datetime_01), NULL) AS o6, + if(max(other_max_datetime_02) > '2020-10-31 00:00:00', max(other_max_datetime_02), NULL) AS o7, + argMaxMerge(agg_argmax_string_datetime_13) AS o8, + argMaxMerge(agg_argmax_string_datetime_05) AS o9, + argMaxMerge(agg_argmax_string_datetime_06) AS o10, + argMaxMerge(agg_argmax_string_datetime_02) AS o11, + argMaxMerge(agg_argmax_string_datetime_04) AS o12, + argMaxMerge(agg_argmax_string_datetime_15) AS o13, + argMaxMerge(agg_argmax_string_datetime_01) AS o14, + argMaxMerge(agg_argmax_string_u8_01) AS o15, + argMaxMerge(agg_argmax_f32_datetime_02) AS o16, + if(argMaxMerge(agg_argmax_string_datetime_09) != '', argMaxMerge(agg_argmax_string_datetime_09), NULL) AS o17, + if(argMaxMerge(agg_argmax_date_datetime_01) > '2020-10-31', argMaxMerge(agg_argmax_date_datetime_01), NULL) AS o18, + if(argMaxMerge(agg_argmax_date_datetime_02) > '2020-10-31', argMaxMerge(agg_argmax_date_datetime_02), NULL) AS o19, + argMaxMerge(agg_argmax_u8_other_02) AS o20, + argMaxMerge(agg_argmax_u8_other_03) AS o21, + argMaxMerge(agg_argmax_u8_other_04) AS o22, + argMaxMerge(agg_argmax_u8_other_01) AS o23, + argMaxMerge(agg_argmax_string_datetime_10) AS o24, + argMaxMerge(agg_argmax_string_datetime_11) AS o25, + countMerge(aggCount_3) AS o26, + countMerge(aggCount_4) AS o27 + FROM interpret_table_01 AS c + PREWHERE cityHash64('0321352416546546546546546546546', lower('BU'), lower('random2'), toUInt128(toUInt128('1015029'))) = c.idColumnU64 + ) AS s01, + ( + WITH ( + SELECT coalesce(if((topKWeightedMerge(2)(agg_topk_01)[1]) != toUInt128(toUInt128('1015029')), topKWeightedMerge(2)(agg_topk_01)[1], topKWeightedMerge(2)(agg_topk_01)[2]), 0) + FROM interpret_table_02 AS c + PREWHERE cityHash64('0321352416546546546546546546546', lower('BU'), lower('random2')) = c.idColumnU64 + ) AS other_idU128 + SELECT + if(max(other_max_datetime_05) > '2020-10-31 00:00:00', max(other_max_datetime_05), NULL) AS o28, + if(max(other_max_datetime_01) > '2020-10-31 00:00:00', max(other_max_datetime_01), NULL) AS o29, + if(max(nDateTime_02_date) > '2020-10-31 00:00:00', max(nDateTime_02_date), NULL) AS o30, + other_idU128 + FROM interpret_table_01 AS c + PREWHERE cityHash64('0321352416546546546546546546546', lower('BU'), lower('random2'), toUInt128(other_idU128)) = c.idColumnU64 + ) AS s02, + ( + SELECT + minIf(minDate, dateColumn > (now() - toIntervalDay(7))) AS o31, + maxIf(maxDate, dateColumn > (now() - toIntervalDay(7))) AS o32, + maxIf(maxInt16, dateColumn > (now() - toIntervalDay(28))) AS o33, + countMergeIf(aggCount_3, dateColumn > (now() - toIntervalHour(24))) AS o34, + countMergeIf(aggCount_3, dateColumn > (now() - toIntervalDay(14))) AS o35, + countMergeIf(aggCount_3, dateColumn > (now() - toIntervalDay(28))) AS o36, + countMergeIf(aggCount_4, dateColumn > (now() - toIntervalHour(24))) AS o37, + countMergeIf(aggCount_4, dateColumn > (now() - toIntervalDay(7))) AS o38, + countMergeIf(aggCount_4, dateColumn > (now() - toIntervalDay(28))) AS o27_month, + countMergeIf(aggCount_2_shown, dateColumn > (now() - toIntervalDay(14))) AS o40 + FROM interpret_table_03 AS c + PREWHERE cityHash64('0321352416546546546546546546546', lower('BU'), lower('random2'), toUInt128(toUInt128('1015029'))) = c.idColumnU64 + ) AS s03, + ( + SELECT + countMerge(agg_count_03) AS o41, + countMerge(agg_count_04) AS o42 + FROM interpret_table_04 AS c + PREWHERE (cityHash64('0321352416546546546546546546546', lower('BU'), lower('random2'), toUInt128(toUInt128('1015029'))) = c.idColumnU64) AND (ls_01 = 'exit') + ) AS s04, + ( + SELECT + countMerge(aggCount_3) AS o43, + countMerge(aggCount_4) AS o44, + countMerge(aggCount) AS o45 + FROM interpret_table_02 AS c + PREWHERE cityHash64('0321352416546546546546546546546', lower('BU'), lower('random2')) = c.idColumnU64 + ) AS s05, + ( + SELECT + countMergeIf(aggCount_3, dateColumn > (now() - toIntervalDay(14))) AS o46, + uniqMergeIf(agg_uniq_u128_03, dateColumn > (now() - toIntervalHour(24))) AS o47, + uniqMergeIf(agg_uniq_u128_03, dateColumn > (now() - toIntervalDay(14))) AS o48, + countMergeIf(aggCount_4, dateColumn > (now() - toIntervalDay(14))) AS o49, + countMergeIf(aggCount_4, dateColumn > (now() - toIntervalDay(28))) AS o50 + FROM interpret_table_05 AS c + PREWHERE cityHash64('0321352416546546546546546546546', lower('BU'), lower('random2')) = c.idColumnU64 + ) AS s06, + ( + SELECT countMerge(aggCount_3) AS o51 + FROM interpret_table_06 AS c + PREWHERE cityHash64('0321352416546546546546546546546', lower('BU'), lower('random2'), toUInt128(joinGet(interpret_table_07, 'idColumnI64', toUInt128('1015029')))) = c.idColumnU64 + ) AS s07, + ( + SELECT + countMergeIf(aggCount_3, dateColumn > (now() - toIntervalDay(28))) AS s52, + countMergeIf(aggCount_4, dateColumn > (now() - toIntervalDay(28))) AS s53 + FROM interpret_table_08 AS c + PREWHERE cityHash64('0321352416546546546546546546546', lower('BU'), lower('random2'), toUInt128(joinGet(interpret_table_07, 'idColumnI64', toUInt128('1015029')))) = c.idColumnU64 + ) AS s08 + ) AS final_s01 + FORMAT JSONEachRow; + + diff --git a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql index b3234e03a8f..c3cc0bbb9eb 100644 --- a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql +++ b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql @@ -39,7 +39,7 @@ RENAME TABLE test_01155_ordinary.mv1 TO test_01155_atomic.mv1; RENAME TABLE test_01155_ordinary.mv2 TO test_01155_atomic.mv2; RENAME TABLE test_01155_ordinary.dst TO test_01155_atomic.dst; RENAME TABLE test_01155_ordinary.src TO test_01155_atomic.src; -SET check_table_dependencies=0; +SET check_table_dependencies=0; -- Otherwise we'll get error "test_01155_atomic.dict depends on test_01155_ordinary.dist" in the next line. RENAME TABLE test_01155_ordinary.dist TO test_01155_atomic.dist; SET check_table_dependencies=1; RENAME DICTIONARY test_01155_ordinary.dict TO test_01155_atomic.dict; @@ -65,7 +65,9 @@ SELECT dictGet('test_01155_ordinary.dict', 'x', 'after renaming database'); SELECT database, substr(name, 1, 10) FROM system.tables WHERE database like 'test_01155_%'; -- Move tables back +SET check_table_dependencies=0; -- Otherwise we'll get error "test_01155_atomic.dict depends on test_01155_ordinary.dist" in the next line. RENAME DATABASE test_01155_ordinary TO test_01155_atomic; +SET check_table_dependencies=1; set allow_deprecated_database_ordinary=1; CREATE DATABASE test_01155_ordinary ENGINE=Ordinary; diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index dd9fa7abc1b..f2c3e8eda9d 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -39,6 +39,7 @@ ALTER MOVE PARTITION ['ALTER MOVE PART','MOVE PARTITION','MOVE PART'] TABLE ALTE ALTER FETCH PARTITION ['ALTER FETCH PART','FETCH PARTITION'] TABLE ALTER TABLE ALTER FREEZE PARTITION ['FREEZE PARTITION','UNFREEZE'] TABLE ALTER TABLE ALTER DATABASE SETTINGS ['ALTER DATABASE SETTING','ALTER MODIFY DATABASE SETTING','MODIFY DATABASE SETTING'] DATABASE ALTER DATABASE +ALTER NAMED COLLECTION [] \N ALTER ALTER TABLE [] \N ALTER ALTER DATABASE [] \N ALTER ALTER VIEW REFRESH ['ALTER LIVE VIEW REFRESH','REFRESH VIEW'] VIEW ALTER VIEW @@ -51,12 +52,14 @@ CREATE VIEW [] VIEW CREATE CREATE DICTIONARY [] DICTIONARY CREATE CREATE TEMPORARY TABLE [] GLOBAL CREATE CREATE FUNCTION [] GLOBAL CREATE +CREATE NAMED COLLECTION [] GLOBAL CREATE CREATE [] \N ALL DROP DATABASE [] DATABASE DROP DROP TABLE [] TABLE DROP DROP VIEW [] VIEW DROP DROP DICTIONARY [] DICTIONARY DROP DROP FUNCTION [] GLOBAL DROP +DROP NAMED COLLECTION [] GLOBAL DROP DROP [] \N ALL TRUNCATE ['TRUNCATE TABLE'] TABLE ALL OPTIMIZE ['OPTIMIZE TABLE'] TABLE ALL diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference new file mode 100644 index 00000000000..c66682ca038 --- /dev/null +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference @@ -0,0 +1,21 @@ +# clickhouse-client +concatAssumeInjective: OK +ReplacingMergeTree: OK +JSONEachRow: OK +clusterAllReplicas: OK +SimpleAggregateFunction: OK +write_ahead_log_interval_ms_to_fsync: OK +max_concurrent_queries_for_all_users: OK +test_shard_localhost: OK +default_path_test: OK +default: OK +uniqCombined64ForEach: OK +system: OK +aggregate_function_combinators: OK +primary_key_bytes_in_memory_allocated: OK +# clickhouse-local +concatAssumeInjective: OK +ReplacingMergeTree: OK +JSONEachRow: OK +clusterAllReplicas: OK +SimpleAggregateFunction: OK diff --git a/tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh similarity index 64% rename from tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.sh rename to tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh index 1be082a6aae..056613c11b5 100755 --- a/tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.sh +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh @@ -5,9 +5,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +SCRIPT_PATH="$CURDIR/$CLICKHOUSE_TEST_UNIQUE_NAME.generated-expect" + # NOTE: database = $CLICKHOUSE_DATABASE is superfluous -function test_completion_word_client() +function test_completion_word() { local w=$1 && shift @@ -15,10 +17,20 @@ function test_completion_word_client() local compword_begin=${w:0:$((w_len-3))} local compword_end=${w:$((w_len-3))} - # NOTE: here and below you should escape variables of the expect. - timeout 60s expect << EOF + # NOTE: + # - here and below you should escape variables of the expect. + # - you should not use "expect <<..." since in this case timeout/eof will + # not work (I guess due to attached stdin) + cat > "$SCRIPT_PATH" << EOF +# NOTE: log will be appended +exp_internal -f $CLICKHOUSE_TMP/$(basename "${BASH_SOURCE[0]}").debuglog 0 + +# NOTE: when expect have EOF on stdin it also closes stdout, so let's reopen it +# again for logging +set stdout_channel [open "/dev/stdout" w] + log_user 0 -set timeout 3 +set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect @@ -27,7 +39,7 @@ expect_after { timeout { exit 1 } } -spawn bash -c "$CLICKHOUSE_CLIENT_BINARY $CLICKHOUSE_CLIENT_OPT" +spawn bash -c "$*" expect ":) " # Make a query @@ -36,10 +48,12 @@ expect "SET $compword_begin" # Wait for suggestions to load, they are loaded in background set is_done 0 +set timeout 1 while {\$is_done == 0} { send -- "\\t" expect { "$compword_begin$compword_end" { + puts \$stdout_channel "$compword_begin$compword_end: OK" set is_done 1 } default { @@ -48,9 +62,18 @@ while {\$is_done == 0} { } } +close \$stdout_channel + send -- "\\3\\4" expect eof EOF + + # NOTE: run expect under timeout since there is while loop that is not + # limited with timeout. + # + # NOTE: cat is required to serialize stdout for expect (without this pipe + # it will reopen the file again, and the output will be mixed). + timeout 2m expect -f "$SCRIPT_PATH" | cat } # last 3 bytes will be completed, @@ -90,53 +113,6 @@ client_compwords_positive=( # FIXME: none ) - -function test_completion_word_local() -{ - local w=$1 && shift - - local w_len=${#w} - local compword_begin=${w:0:$((w_len-3))} - local compword_end=${w:$((w_len-3))} - - # NOTE: here and below you should escape variables of the expect. - timeout 60s expect << EOF -log_user 0 -set timeout 3 -match_max 100000 -expect_after { - # Do not ignore eof from expect - eof { exp_continue } - # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } -} - -spawn bash -c "$CLICKHOUSE_LOCAL" -expect ":) " - -# Make a query -send -- "SET $compword_begin" -expect "SET $compword_begin" - -# Wait for suggestions to load, they are loaded in background -set is_done 0 -while {\$is_done == 0} { - send -- "\\t" - expect { - "$compword_begin$compword_end" { - set is_done 1 - } - default { - sleep 1 - } - } -} - -send -- "\\3\\4" -expect eof -EOF -} - local_compwords_positive=( # system.functions concatAssumeInjective @@ -150,12 +126,15 @@ local_compwords_positive=( SimpleAggregateFunction ) +echo "# clickhouse-client" for w in "${client_compwords_positive[@]}"; do - test_completion_word_client "$w" || echo "[FAIL] $w (positive)" + test_completion_word "$w" "$CLICKHOUSE_CLIENT" +done +echo "# clickhouse-local" +for w in "${local_compwords_positive[@]}"; do + test_completion_word "$w" "$CLICKHOUSE_LOCAL" done -for w in "${local_compwords_positive[@]}"; do - test_completion_word_local "$w" || echo "[FAIL] $w (positive)" -done +rm -f "${SCRIPT_PATH:?}" exit 0 diff --git a/tests/queries/0_stateless/01801_s3_cluster_count.reference b/tests/queries/0_stateless/01801_s3_cluster_count.reference new file mode 100644 index 00000000000..c094c553f81 --- /dev/null +++ b/tests/queries/0_stateless/01801_s3_cluster_count.reference @@ -0,0 +1,2 @@ +12 +12 diff --git a/tests/queries/0_stateless/01801_s3_cluster_count.sql b/tests/queries/0_stateless/01801_s3_cluster_count.sql new file mode 100644 index 00000000000..8a4fb804967 --- /dev/null +++ b/tests/queries/0_stateless/01801_s3_cluster_count.sql @@ -0,0 +1,5 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +select COUNT() from s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv'); +select COUNT(*) from s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv'); diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index c206a41a03e..5033e888896 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -284,7 +284,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW ACCESS' = 87, 'SHOW NAMED COLLECTIONS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM SYNC DATABASE REPLICA' = 123, 'SYSTEM SYNC TRANSACTION LOG' = 124, 'SYSTEM FLUSH DISTRIBUTED' = 125, 'SYSTEM FLUSH LOGS' = 126, 'SYSTEM FLUSH' = 127, 'SYSTEM THREAD FUZZER' = 128, 'SYSTEM UNFREEZE' = 129, 'SYSTEM' = 130, 'dictGet' = 131, 'addressToLine' = 132, 'addressToLineWithInlines' = 133, 'addressToSymbol' = 134, 'demangle' = 135, 'INTROSPECTION' = 136, 'FILE' = 137, 'URL' = 138, 'REMOTE' = 139, 'MONGO' = 140, 'MEILISEARCH' = 141, 'MYSQL' = 142, 'POSTGRES' = 143, 'SQLITE' = 144, 'ODBC' = 145, 'JDBC' = 146, 'HDFS' = 147, 'S3' = 148, 'HIVE' = 149, 'SOURCES' = 150, 'CLUSTER' = 151, 'ALL' = 152, 'NONE' = 153), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -560,10 +560,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW ACCESS' = 87, 'SHOW NAMED COLLECTIONS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM SYNC DATABASE REPLICA' = 123, 'SYSTEM SYNC TRANSACTION LOG' = 124, 'SYSTEM FLUSH DISTRIBUTED' = 125, 'SYSTEM FLUSH LOGS' = 126, 'SYSTEM FLUSH' = 127, 'SYSTEM THREAD FUZZER' = 128, 'SYSTEM UNFREEZE' = 129, 'SYSTEM' = 130, 'dictGet' = 131, 'addressToLine' = 132, 'addressToLineWithInlines' = 133, 'addressToSymbol' = 134, 'demangle' = 135, 'INTROSPECTION' = 136, 'FILE' = 137, 'URL' = 138, 'REMOTE' = 139, 'MONGO' = 140, 'MEILISEARCH' = 141, 'MYSQL' = 142, 'POSTGRES' = 143, 'SQLITE' = 144, 'ODBC' = 145, 'JDBC' = 146, 'HDFS' = 147, 'S3' = 148, 'HIVE' = 149, 'SOURCES' = 150, 'CLUSTER' = 151, 'ALL' = 152, 'NONE' = 153), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW ACCESS' = 87, 'SHOW NAMED COLLECTIONS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM SYNC DATABASE REPLICA' = 123, 'SYSTEM SYNC TRANSACTION LOG' = 124, 'SYSTEM FLUSH DISTRIBUTED' = 125, 'SYSTEM FLUSH LOGS' = 126, 'SYSTEM FLUSH' = 127, 'SYSTEM THREAD FUZZER' = 128, 'SYSTEM UNFREEZE' = 129, 'SYSTEM' = 130, 'dictGet' = 131, 'addressToLine' = 132, 'addressToLineWithInlines' = 133, 'addressToSymbol' = 134, 'demangle' = 135, 'INTROSPECTION' = 136, 'FILE' = 137, 'URL' = 138, 'REMOTE' = 139, 'MONGO' = 140, 'MEILISEARCH' = 141, 'MYSQL' = 142, 'POSTGRES' = 143, 'SQLITE' = 144, 'ODBC' = 145, 'JDBC' = 146, 'HDFS' = 147, 'S3' = 148, 'HIVE' = 149, 'SOURCES' = 150, 'CLUSTER' = 151, 'ALL' = 152, 'NONE' = 153)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' diff --git a/tests/queries/0_stateless/02241_array_first_last_or_null.reference b/tests/queries/0_stateless/02241_array_first_last_or_null.reference index 2906b04ecd0..fc4a5ff8af5 100644 --- a/tests/queries/0_stateless/02241_array_first_last_or_null.reference +++ b/tests/queries/0_stateless/02241_array_first_last_or_null.reference @@ -7,6 +7,9 @@ ArrayFirst non constant predicate \N 2 2 +ArrayFirst with Null +2 +\N ArrayLast constant predicate \N \N @@ -16,3 +19,6 @@ ArrayLast non constant predicate \N 3 3 +ArrayLast with Null +2 +\N diff --git a/tests/queries/0_stateless/02241_array_first_last_or_null.sql b/tests/queries/0_stateless/02241_array_first_last_or_null.sql index 3230e4d483a..aa8f0cdbf92 100644 --- a/tests/queries/0_stateless/02241_array_first_last_or_null.sql +++ b/tests/queries/0_stateless/02241_array_first_last_or_null.sql @@ -9,6 +9,10 @@ SELECT arrayFirstOrNull(x -> x >= 2, emptyArrayUInt8()); SELECT arrayFirstOrNull(x -> x >= 2, [1, 2, 3]); SELECT arrayFirstOrNull(x -> x >= 2, materialize([1, 2, 3])); +SELECT 'ArrayFirst with Null'; +SELECT arrayFirstOrNull((x,f) -> f, [1,2,3,NULL], [0,1,0,0]); +SELECT arrayFirstOrNull((x,f) -> f, [1,2,3,NULL], [0,0,0,1]); + SELECT 'ArrayLast constant predicate'; SELECT arrayLastOrNull(x -> 1, emptyArrayUInt8()); SELECT arrayLastOrNull(x -> 0, emptyArrayUInt8()); @@ -19,3 +23,7 @@ SELECT 'ArrayLast non constant predicate'; SELECT arrayLastOrNull(x -> x >= 2, emptyArrayUInt8()); SELECT arrayLastOrNull(x -> x >= 2, [1, 2, 3]); SELECT arrayLastOrNull(x -> x >= 2, materialize([1, 2, 3])); + +SELECT 'ArrayLast with Null'; +SELECT arrayLastOrNull((x,f) -> f, [1,2,3,NULL], [0,1,0,0]); +SELECT arrayLastOrNull((x,f) -> f, [1,2,3,NULL], [0,1,0,1]); \ No newline at end of file diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 34180020680..978f19d8381 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -82,6 +82,7 @@ addYears addressToLine addressToLineWithInlines addressToSymbol +age alphaTokens and appendTrailingCharIfAbsent diff --git a/tests/queries/0_stateless/02417_from_select_syntax.reference b/tests/queries/0_stateless/02417_from_select_syntax.reference new file mode 100644 index 00000000000..44e0be8e356 --- /dev/null +++ b/tests/queries/0_stateless/02417_from_select_syntax.reference @@ -0,0 +1,4 @@ +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/02417_from_select_syntax.sql b/tests/queries/0_stateless/02417_from_select_syntax.sql new file mode 100644 index 00000000000..ce6cb3a14da --- /dev/null +++ b/tests/queries/0_stateless/02417_from_select_syntax.sql @@ -0,0 +1,4 @@ +FROM numbers(1) SELECT number; +WITH 1 as n FROM numbers(1) SELECT number * n; +FROM (FROM numbers(1) SELECT *) SELECT number; +FROM (FROM numbers(1) SELECT *) AS select SELECT number; diff --git a/tests/queries/0_stateless/02477_age.reference b/tests/queries/0_stateless/02477_age.reference new file mode 100644 index 00000000000..249c413d923 --- /dev/null +++ b/tests/queries/0_stateless/02477_age.reference @@ -0,0 +1,76 @@ +Various intervals +-1 +0 +0 +-7 +-3 +0 +-23 +-11 +0 +-103 +-52 +0 +-730 +-364 +1 +-17520 +-8736 +24 +-1051200 +-524160 +1440 +-63072000 +-31449600 +86400 +DateTime arguments +0 +23 +1439 +86399 +Date and DateTime arguments +-63072000 +-31449600 +86400 +Constant and non-constant arguments +-1051200 +-524160 +1440 +Case insensitive +-10 +Dependance of timezones +0 +0 +1 +25 +1500 +90000 +0 +0 +1 +24 +1440 +86400 +0 +0 +1 +25 +1500 +90000 +0 +0 +1 +24 +1440 +86400 +Additional test +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02477_age.sql b/tests/queries/0_stateless/02477_age.sql new file mode 100644 index 00000000000..9b612276b01 --- /dev/null +++ b/tests/queries/0_stateless/02477_age.sql @@ -0,0 +1,82 @@ +SELECT 'Various intervals'; + +SELECT age('year', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT age('year', toDate('2017-12-31'), toDate('2017-01-01')); +SELECT age('year', toDate('2017-12-31'), toDate('2018-01-01')); +SELECT age('quarter', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT age('quarter', toDate('2017-12-31'), toDate('2017-01-01')); +SELECT age('quarter', toDate('2017-12-31'), toDate('2018-01-01')); +SELECT age('month', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT age('month', toDate('2017-12-31'), toDate('2017-01-01')); +SELECT age('month', toDate('2017-12-31'), toDate('2018-01-01')); +SELECT age('week', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT age('week', toDate('2017-12-31'), toDate('2017-01-01')); +SELECT age('week', toDate('2017-12-31'), toDate('2018-01-01')); +SELECT age('day', toDate('2017-12-31'), toDate('2016-01-01')); +SELECT age('day', toDate('2017-12-31'), toDate('2017-01-01')); +SELECT age('day', toDate('2017-12-31'), toDate('2018-01-01')); +SELECT age('hour', toDate('2017-12-31'), toDate('2016-01-01'), 'UTC'); +SELECT age('hour', toDate('2017-12-31'), toDate('2017-01-01'), 'UTC'); +SELECT age('hour', toDate('2017-12-31'), toDate('2018-01-01'), 'UTC'); +SELECT age('minute', toDate('2017-12-31'), toDate('2016-01-01'), 'UTC'); +SELECT age('minute', toDate('2017-12-31'), toDate('2017-01-01'), 'UTC'); +SELECT age('minute', toDate('2017-12-31'), toDate('2018-01-01'), 'UTC'); +SELECT age('second', toDate('2017-12-31'), toDate('2016-01-01'), 'UTC'); +SELECT age('second', toDate('2017-12-31'), toDate('2017-01-01'), 'UTC'); +SELECT age('second', toDate('2017-12-31'), toDate('2018-01-01'), 'UTC'); + +SELECT 'DateTime arguments'; +SELECT age('day', toDateTime('2016-01-01 00:00:01', 'UTC'), toDateTime('2016-01-02 00:00:00', 'UTC'), 'UTC'); +SELECT age('hour', toDateTime('2016-01-01 00:00:01', 'UTC'), toDateTime('2016-01-02 00:00:00', 'UTC'), 'UTC'); +SELECT age('minute', toDateTime('2016-01-01 00:00:01', 'UTC'), toDateTime('2016-01-02 00:00:00', 'UTC'), 'UTC'); +SELECT age('second', toDateTime('2016-01-01 00:00:01', 'UTC'), toDateTime('2016-01-02 00:00:00', 'UTC'), 'UTC'); + +SELECT 'Date and DateTime arguments'; + +SELECT age('second', toDate('2017-12-31'), toDateTime('2016-01-01 00:00:00', 'UTC'), 'UTC'); +SELECT age('second', toDateTime('2017-12-31 00:00:00', 'UTC'), toDate('2017-01-01'), 'UTC'); +SELECT age('second', toDateTime('2017-12-31 00:00:00', 'UTC'), toDateTime('2018-01-01 00:00:00', 'UTC')); + +SELECT 'Constant and non-constant arguments'; + +SELECT age('minute', materialize(toDate('2017-12-31')), toDate('2016-01-01'), 'UTC'); +SELECT age('minute', toDate('2017-12-31'), materialize(toDate('2017-01-01')), 'UTC'); +SELECT age('minute', materialize(toDate('2017-12-31')), materialize(toDate('2018-01-01')), 'UTC'); + +SELECT 'Case insensitive'; + +SELECT age('year', today(), today() - INTERVAL 10 YEAR); + +SELECT 'Dependance of timezones'; + +SELECT age('month', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT age('week', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT age('day', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT age('hour', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT age('minute', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); +SELECT age('second', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); + +SELECT age('month', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); +SELECT age('week', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); +SELECT age('day', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); +SELECT age('hour', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); +SELECT age('minute', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); +SELECT age('second', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); + +SELECT age('month', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT age('week', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT age('day', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT age('hour', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT age('minute', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); +SELECT age('second', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); + +SELECT age('month', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); +SELECT age('week', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); +SELECT age('day', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); +SELECT age('hour', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); +SELECT age('minute', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); +SELECT age('second', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); + +SELECT 'Additional test'; + +SELECT number = age('month', now() - INTERVAL number MONTH, now()) FROM system.numbers LIMIT 10; diff --git a/tests/queries/0_stateless/02477_age_date32.reference b/tests/queries/0_stateless/02477_age_date32.reference new file mode 100644 index 00000000000..69f27a10acc --- /dev/null +++ b/tests/queries/0_stateless/02477_age_date32.reference @@ -0,0 +1,169 @@ +-- { echo } + +-- Date32 vs Date32 +SELECT age('second', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +24 +SELECT age('day', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +1 +SELECT age('week', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); +1 +SELECT age('month', toDate32('1927-01-01', 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDate32('1927-01-01', 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); +1 +SELECT age('year', toDate32('1927-01-01', 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); +1 +-- With DateTime64 +-- Date32 vs DateTime64 +SELECT age('second', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +24 +SELECT age('day', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT age('week', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-08 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT age('month', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-04-01 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT age('year', toDate32('1927-01-01', 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC'), 'UTC'); +1 +-- DateTime64 vs Date32 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +24 +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +1 +SELECT age('week', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); +1 +SELECT age('month', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); +1 +SELECT age('year', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); +1 +-- With DateTime +-- Date32 vs DateTime +SELECT age('second', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +24 +SELECT age('day', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +1 +SELECT age('week', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-25 00:00:00', 'UTC'), 'UTC'); +1 +SELECT age('month', toDate32('2015-08-18', 'UTC'), toDateTime('2015-09-18 00:00:00', 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDate32('2015-08-18', 'UTC'), toDateTime('2015-11-18 00:00:00', 'UTC'), 'UTC'); +1 +SELECT age('year', toDate32('2015-08-18', 'UTC'), toDateTime('2016-08-18 00:00:00', 'UTC'), 'UTC'); +1 +-- DateTime vs Date32 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +24 +SELECT age('day', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT age('week', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); +1 +SELECT age('month', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); +1 +SELECT age('year', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); +1 +-- With Date +-- Date32 vs Date +SELECT age('second', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +24 +SELECT age('day', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT age('week', toDate32('2015-08-18', 'UTC'), toDate('2015-08-25', 'UTC'), 'UTC'); +1 +SELECT age('month', toDate32('2015-08-18', 'UTC'), toDate('2015-09-18', 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDate32('2015-08-18', 'UTC'), toDate('2015-11-18', 'UTC'), 'UTC'); +1 +SELECT age('year', toDate32('2015-08-18', 'UTC'), toDate('2016-08-18', 'UTC'), 'UTC'); +1 +-- Date vs Date32 +SELECT age('second', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +86400 +SELECT age('minute', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +1440 +SELECT age('hour', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +24 +SELECT age('day', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT age('week', toDate('2015-08-18', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); +1 +SELECT age('month', toDate('2015-08-18', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); +1 +SELECT age('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); +1 +SELECT age('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); +1 +-- Const vs non-const columns +SELECT age('day', toDate32('1927-01-01', 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +1 +SELECT age('day', toDate32('1927-01-01', 'UTC'), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); +1 +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +1 +SELECT age('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); +1 +SELECT age('day', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +1 +SELECT age('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); +1 +SELECT age('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +1 +-- Non-const vs const columns +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); +1 +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +1 +SELECT age('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); +1 +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +1 +SELECT age('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015-08-19', 'UTC'), 'UTC'); +1 +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); +1 +-- Non-const vs non-const columns +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +1 +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); +1 +SELECT age('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +1 +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); +1 +SELECT age('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +1 +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); +1 +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +1 diff --git a/tests/queries/0_stateless/02477_age_date32.sql b/tests/queries/0_stateless/02477_age_date32.sql new file mode 100644 index 00000000000..43ff458c2d1 --- /dev/null +++ b/tests/queries/0_stateless/02477_age_date32.sql @@ -0,0 +1,101 @@ +-- { echo } + +-- Date32 vs Date32 +SELECT age('second', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('minute', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('hour', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('day', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('week', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); +SELECT age('month', toDate32('1927-01-01', 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); +SELECT age('quarter', toDate32('1927-01-01', 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); +SELECT age('year', toDate32('1927-01-01', 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); + +-- With DateTime64 +-- Date32 vs DateTime64 +SELECT age('second', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('minute', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('hour', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('day', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('week', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-08 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('month', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('quarter', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-04-01 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('year', toDate32('1927-01-01', 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC'), 'UTC'); + +-- DateTime64 vs Date32 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('week', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); +SELECT age('month', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); +SELECT age('quarter', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); +SELECT age('year', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); + +-- With DateTime +-- Date32 vs DateTime +SELECT age('second', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT age('minute', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT age('hour', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT age('day', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT age('week', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-25 00:00:00', 'UTC'), 'UTC'); +SELECT age('month', toDate32('2015-08-18', 'UTC'), toDateTime('2015-09-18 00:00:00', 'UTC'), 'UTC'); +SELECT age('quarter', toDate32('2015-08-18', 'UTC'), toDateTime('2015-11-18 00:00:00', 'UTC'), 'UTC'); +SELECT age('year', toDate32('2015-08-18', 'UTC'), toDateTime('2016-08-18 00:00:00', 'UTC'), 'UTC'); + +-- DateTime vs Date32 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('minute', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('hour', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('day', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('week', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); +SELECT age('month', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); +SELECT age('quarter', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); +SELECT age('year', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); + +-- With Date +-- Date32 vs Date +SELECT age('second', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT age('minute', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT age('hour', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT age('day', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT age('week', toDate32('2015-08-18', 'UTC'), toDate('2015-08-25', 'UTC'), 'UTC'); +SELECT age('month', toDate32('2015-08-18', 'UTC'), toDate('2015-09-18', 'UTC'), 'UTC'); +SELECT age('quarter', toDate32('2015-08-18', 'UTC'), toDate('2015-11-18', 'UTC'), 'UTC'); +SELECT age('year', toDate32('2015-08-18', 'UTC'), toDate('2016-08-18', 'UTC'), 'UTC'); + +-- Date vs Date32 +SELECT age('second', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('minute', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('hour', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('day', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('week', toDate('2015-08-18', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); +SELECT age('month', toDate('2015-08-18', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); +SELECT age('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); +SELECT age('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); + +-- Const vs non-const columns +SELECT age('day', toDate32('1927-01-01', 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +SELECT age('day', toDate32('1927-01-01', 'UTC'), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +SELECT age('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); +SELECT age('day', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +SELECT age('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); +SELECT age('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); + +-- Non-const vs const columns +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); +SELECT age('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); +SELECT age('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015-08-19', 'UTC'), 'UTC'); +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); + +-- Non-const vs non-const columns +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); +SELECT age('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); +SELECT age('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); +SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); diff --git a/tests/queries/0_stateless/02477_age_datetime64.reference b/tests/queries/0_stateless/02477_age_datetime64.reference new file mode 100644 index 00000000000..3b4459dd26d --- /dev/null +++ b/tests/queries/0_stateless/02477_age_datetime64.reference @@ -0,0 +1,113 @@ +-- { echo } + +-- DateTime64 vs DateTime64 same scale +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:00:10', 0, 'UTC')); +10 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:10:00', 0, 'UTC')); +600 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 01:00:00', 0, 'UTC')); +3600 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 01:10:10', 0, 'UTC')); +4210 +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:10:00', 0, 'UTC')); +10 +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 10:00:00', 0, 'UTC')); +600 +SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 10:00:00', 0, 'UTC')); +10 +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-02 00:00:00', 0, 'UTC')); +1 +SELECT age('month', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-02-01 00:00:00', 0, 'UTC')); +1 +SELECT age('year', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1928-01-01 00:00:00', 0, 'UTC')); +1 +-- DateTime64 vs DateTime64 different scale +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:00:10', 3, 'UTC')); +10 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:10:00', 3, 'UTC')); +600 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 01:00:00', 3, 'UTC')); +3600 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 01:10:10', 3, 'UTC')); +4210 +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:10:00', 3, 'UTC')); +10 +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 10:00:00', 3, 'UTC')); +600 +SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 10:00:00', 3, 'UTC')); +10 +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC')); +1 +SELECT age('month', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC')); +1 +SELECT age('year', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC')); +1 +-- With DateTime +-- DateTime64 vs DateTime +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:00:00', 'UTC')); +0 +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:00:10', 'UTC')); +10 +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:10:00', 'UTC')); +600 +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 01:00:00', 'UTC')); +3600 +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 01:10:10', 'UTC')); +4210 +-- DateTime vs DateTime64 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:00:00', 3, 'UTC')); +0 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:00:10', 3, 'UTC')); +10 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:10:00', 3, 'UTC')); +600 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 01:00:00', 3, 'UTC')); +3600 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 01:10:10', 3, 'UTC')); +4210 +-- With Date +-- DateTime64 vs Date +SELECT age('day', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDate('2015-08-19', 'UTC')); +1 +-- Date vs DateTime64 +SELECT age('day', toDate('2015-08-18', 'UTC'), toDateTime64('2015-08-19 00:00:00', 3, 'UTC')); +1 +-- Same thing but const vs non-const columns +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), materialize(toDateTime64('1927-01-01 00:00:10', 0, 'UTC'))); +10 +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), materialize(toDateTime64('1927-01-01 00:00:10', 3, 'UTC'))); +10 +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), materialize(toDateTime('2015-08-18 00:00:10', 'UTC'))); +10 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); +10 +SELECT age('day', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), materialize(toDate('2015-08-19', 'UTC'))); +1 +SELECT age('day', toDate('2015-08-18', 'UTC'), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); +1 +-- Same thing but non-const vs const columns +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 0, 'UTC')), toDateTime64('1927-01-01 00:00:10', 0, 'UTC')); +10 +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 6, 'UTC')), toDateTime64('1927-01-01 00:00:10', 3, 'UTC')); +10 +SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), toDateTime('2015-08-18 00:00:10', 'UTC')); +10 +SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDateTime64('2015-08-18 00:00:10', 3, 'UTC')); +10 +SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), toDate('2015-08-19', 'UTC')); +1 +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), toDateTime64('2015-08-19 00:00:00', 3, 'UTC')); +1 +-- Same thing but non-const vs non-const columns +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 0, 'UTC')), materialize(toDateTime64('1927-01-01 00:00:10', 0, 'UTC'))); +10 +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 6, 'UTC')), materialize(toDateTime64('1927-01-01 00:00:10', 3, 'UTC'))); +10 +SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDateTime('2015-08-18 00:00:10', 'UTC'))); +10 +SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); +10 +SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); +1 +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); +1 diff --git a/tests/queries/0_stateless/02477_age_datetime64.sql b/tests/queries/0_stateless/02477_age_datetime64.sql new file mode 100644 index 00000000000..1bed93991ca --- /dev/null +++ b/tests/queries/0_stateless/02477_age_datetime64.sql @@ -0,0 +1,77 @@ +-- { echo } + +-- DateTime64 vs DateTime64 same scale +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:00:10', 0, 'UTC')); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:10:00', 0, 'UTC')); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 01:00:00', 0, 'UTC')); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 01:10:10', 0, 'UTC')); + +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:10:00', 0, 'UTC')); +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 10:00:00', 0, 'UTC')); + +SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 10:00:00', 0, 'UTC')); + +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-02 00:00:00', 0, 'UTC')); +SELECT age('month', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-02-01 00:00:00', 0, 'UTC')); +SELECT age('year', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1928-01-01 00:00:00', 0, 'UTC')); + +-- DateTime64 vs DateTime64 different scale +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:00:10', 3, 'UTC')); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:10:00', 3, 'UTC')); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 01:00:00', 3, 'UTC')); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 01:10:10', 3, 'UTC')); + +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:10:00', 3, 'UTC')); +SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 10:00:00', 3, 'UTC')); + +SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 10:00:00', 3, 'UTC')); + +SELECT age('day', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC')); +SELECT age('month', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC')); +SELECT age('year', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC')); + +-- With DateTime +-- DateTime64 vs DateTime +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:00:00', 'UTC')); +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:00:10', 'UTC')); +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:10:00', 'UTC')); +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 01:00:00', 'UTC')); +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 01:10:10', 'UTC')); + +-- DateTime vs DateTime64 +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:00:00', 3, 'UTC')); +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:00:10', 3, 'UTC')); +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:10:00', 3, 'UTC')); +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 01:00:00', 3, 'UTC')); +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 01:10:10', 3, 'UTC')); + +-- With Date +-- DateTime64 vs Date +SELECT age('day', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDate('2015-08-19', 'UTC')); + +-- Date vs DateTime64 +SELECT age('day', toDate('2015-08-18', 'UTC'), toDateTime64('2015-08-19 00:00:00', 3, 'UTC')); + +-- Same thing but const vs non-const columns +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), materialize(toDateTime64('1927-01-01 00:00:10', 0, 'UTC'))); +SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), materialize(toDateTime64('1927-01-01 00:00:10', 3, 'UTC'))); +SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), materialize(toDateTime('2015-08-18 00:00:10', 'UTC'))); +SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); +SELECT age('day', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), materialize(toDate('2015-08-19', 'UTC'))); +SELECT age('day', toDate('2015-08-18', 'UTC'), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); + +-- Same thing but non-const vs const columns +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 0, 'UTC')), toDateTime64('1927-01-01 00:00:10', 0, 'UTC')); +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 6, 'UTC')), toDateTime64('1927-01-01 00:00:10', 3, 'UTC')); +SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), toDateTime('2015-08-18 00:00:10', 'UTC')); +SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDateTime64('2015-08-18 00:00:10', 3, 'UTC')); +SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), toDate('2015-08-19', 'UTC')); +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), toDateTime64('2015-08-19 00:00:00', 3, 'UTC')); + +-- Same thing but non-const vs non-const columns +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 0, 'UTC')), materialize(toDateTime64('1927-01-01 00:00:10', 0, 'UTC'))); +SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 6, 'UTC')), materialize(toDateTime64('1927-01-01 00:00:10', 3, 'UTC'))); +SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDateTime('2015-08-18 00:00:10', 'UTC'))); +SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); +SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); +SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); diff --git a/tests/queries/0_stateless/02480_tets_show_full.reference b/tests/queries/0_stateless/02480_tets_show_full.reference new file mode 100644 index 00000000000..75a3f5d95fa --- /dev/null +++ b/tests/queries/0_stateless/02480_tets_show_full.reference @@ -0,0 +1,2 @@ +test_02480_table MergeTree +test_02480_view View diff --git a/tests/queries/0_stateless/02480_tets_show_full.sh b/tests/queries/0_stateless/02480_tets_show_full.sh new file mode 100755 index 00000000000..5f5040ba128 --- /dev/null +++ b/tests/queries/0_stateless/02480_tets_show_full.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +database=$($CLICKHOUSE_CLIENT -q 'SELECT currentDatabase()') + +$CLICKHOUSE_CLIENT -nm -q " +DROP TABLE IF EXISTS test_02480_table; +DROP VIEW IF EXISTS test_02480_view; +CREATE TABLE test_02480_table (id Int64) ENGINE=MergeTree ORDER BY id; +CREATE VIEW test_02480_view AS SELECT * FROM test_02480_table; +SHOW FULL TABLES FROM $database LIKE '%'; +DROP TABLE IF EXISTS test_02480_table; +DROP VIEW IF EXISTS test_02480_view; +" diff --git a/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference b/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference new file mode 100644 index 00000000000..ea266a98c7e --- /dev/null +++ b/tests/queries/0_stateless/02481_aggregation_in_order_plan.reference @@ -0,0 +1,8 @@ +0 1 1 20 +0 1 1 200 +0 1 2 20 +0 1 2 200 + Aggregating + Order: a ASC, c ASC + ReadFromMergeTree (default.tab) + Sorting (Stream): a ASC, b ASC, c ASC diff --git a/tests/queries/0_stateless/02481_aggregation_in_order_plan.sql b/tests/queries/0_stateless/02481_aggregation_in_order_plan.sql new file mode 100644 index 00000000000..1568e44dbd5 --- /dev/null +++ b/tests/queries/0_stateless/02481_aggregation_in_order_plan.sql @@ -0,0 +1,7 @@ +create table tab (a Int32, b Int32, c Int32, d Int32) engine = MergeTree order by (a, b, c); + +insert into tab select 0, number % 3, 2 - intDiv(number, 3), (number % 3 + 1) * 10 from numbers(6); +insert into tab select 0, number % 3, 2 - intDiv(number, 3), (number % 3 + 1) * 100 from numbers(6); + +select a, any(b), c, d from tab where b = 1 group by a, c, d order by c, d settings optimize_aggregation_in_order=1, query_plan_aggregation_in_order=1; +select * from (explain actions = 1, sorting=1 select a, any(b), c, d from tab where b = 1 group by a, c, d settings optimize_aggregation_in_order=1, query_plan_aggregation_in_order=1) where explain like '%Sorting (Stream)%' or explain like '%ReadFromMergeTree%' or explain like '%Aggregating%' or explain like '%Order:%'; diff --git a/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference new file mode 100644 index 00000000000..dd677873c7c --- /dev/null +++ b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.reference @@ -0,0 +1,117 @@ +-- { echoOn } +SELECT + cutURLParameter('http://bigmir.net/?a=b&c=d', []), + cutURLParameter('http://bigmir.net/?a=b&c=d', ['a']), + cutURLParameter('http://bigmir.net/?a=b&c=d', ['a', 'c']), + cutURLParameter('http://bigmir.net/?a=b&c=d', ['c']), + cutURLParameter('http://bigmir.net/?a=b&c=d#e=f', ['a', 'e']), + cutURLParameter('http://bigmir.net/?a&c=d#e=f', ['c', 'e']), + cutURLParameter('http://bigmir.net/?a&c=d#e=f', ['e']), + cutURLParameter('http://bigmir.net/?a=b&c=d#e=f&g=h', ['b', 'g']), + cutURLParameter('http://bigmir.net/?a=b&c=d#e', ['a', 'e']), + cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', ['c', 'g']), + cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', ['e', 'g']), + cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'e']), + cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'g']), + cutURLParameter('//bigmir.net/?a=b&c=d', []), + cutURLParameter('//bigmir.net/?a=b&c=d', ['a']), + cutURLParameter('//bigmir.net/?a=b&c=d', ['a', 'c']), + cutURLParameter('//bigmir.net/?a=b&c=d#e=f', ['a', 'e']), + cutURLParameter('//bigmir.net/?a&c=d#e=f', ['a']), + cutURLParameter('//bigmir.net/?a&c=d#e=f', ['a', 'c']), + cutURLParameter('//bigmir.net/?a&c=d#e=f', ['a', 'e']), + cutURLParameter('//bigmir.net/?a=b&c=d#e=f&g=h', ['c', 'g']), + cutURLParameter('//bigmir.net/?a=b&c=d#e', ['a', 'c']), + cutURLParameter('//bigmir.net/?a=b&c=d#e', ['a', 'e']), + cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', ['c', 'e']), + cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', ['e', 'g']), + cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'e']), + cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'g']) + FORMAT Vertical; +Row 1: +────── +cutURLParameter('http://bigmir.net/?a=b&c=d', []): http://bigmir.net/?a=b&c=d +cutURLParameter('http://bigmir.net/?a=b&c=d', ['a']): http://bigmir.net/?c=d +cutURLParameter('http://bigmir.net/?a=b&c=d', ['a', 'c']): http://bigmir.net/? +cutURLParameter('http://bigmir.net/?a=b&c=d', ['c']): http://bigmir.net/?a=b +cutURLParameter('http://bigmir.net/?a=b&c=d#e=f', ['a', 'e']): http://bigmir.net/?c=d# +cutURLParameter('http://bigmir.net/?a&c=d#e=f', ['c', 'e']): http://bigmir.net/?a# +cutURLParameter('http://bigmir.net/?a&c=d#e=f', ['e']): http://bigmir.net/?a&c=d# +cutURLParameter('http://bigmir.net/?a=b&c=d#e=f&g=h', ['b', 'g']): http://bigmir.net/?a=b&c=d#e=f +cutURLParameter('http://bigmir.net/?a=b&c=d#e', ['a', 'e']): http://bigmir.net/?c=d#e +cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', ['c', 'g']): http://bigmir.net/?a=b#e +cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', ['e', 'g']): http://bigmir.net/?a=b&c=d#e +cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'e']): http://bigmir.net/?a=b&c=d#test?g=h +cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'g']): http://bigmir.net/?a=b&c=d#test?e=f +cutURLParameter('//bigmir.net/?a=b&c=d', []): //bigmir.net/?a=b&c=d +cutURLParameter('//bigmir.net/?a=b&c=d', ['a']): //bigmir.net/?c=d +cutURLParameter('//bigmir.net/?a=b&c=d', ['a', 'c']): //bigmir.net/? +cutURLParameter('//bigmir.net/?a=b&c=d#e=f', ['a', 'e']): //bigmir.net/?c=d# +cutURLParameter('//bigmir.net/?a&c=d#e=f', ['a']): //bigmir.net/?a&c=d#e=f +cutURLParameter('//bigmir.net/?a&c=d#e=f', ['a', 'c']): //bigmir.net/?a#e=f +cutURLParameter('//bigmir.net/?a&c=d#e=f', ['a', 'e']): //bigmir.net/?a&c=d# +cutURLParameter('//bigmir.net/?a=b&c=d#e=f&g=h', ['c', 'g']): //bigmir.net/?a=b#e=f +cutURLParameter('//bigmir.net/?a=b&c=d#e', ['a', 'c']): //bigmir.net/?#e +cutURLParameter('//bigmir.net/?a=b&c=d#e', ['a', 'e']): //bigmir.net/?c=d#e +cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', ['c', 'e']): //bigmir.net/?a=b#e&g=h +cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', ['e', 'g']): //bigmir.net/?a=b&c=d#e +cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'e']): //bigmir.net/?a=b&c=d#test?g=h +cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'g']): //bigmir.net/?a=b&c=d#test?e=f +SELECT + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), []), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['a']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['a', 'c']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['c']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f'), ['a', 'e']), + cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), ['c', 'e']), + cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), ['e']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h'), ['b', 'g']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), ['a', 'e']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), ['c', 'g']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), ['e', 'g']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'e']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'g']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), []), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), ['a']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), ['a', 'c']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), ['a', 'e']), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), ['a']), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), ['a', 'c']), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), ['a', 'e']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f&g=h'), ['c', 'g']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), ['a', 'c']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), ['a', 'e']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), ['c', 'e']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), ['e', 'g']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'e']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'g']) + FORMAT Vertical; +Row 1: +────── +cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), []): http://bigmir.net/?a=b&c=d +cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['a']): http://bigmir.net/?c=d +cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['a', 'c']): http://bigmir.net/? +cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['c']): http://bigmir.net/?a=b +cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f'), ['a', 'e']): http://bigmir.net/?c=d# +cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), ['c', 'e']): http://bigmir.net/?a# +cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), ['e']): http://bigmir.net/?a&c=d# +cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h'), ['b', 'g']): http://bigmir.net/?a=b&c=d#e=f +cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), ['a', 'e']): http://bigmir.net/?c=d#e +cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), ['c', 'g']): http://bigmir.net/?a=b#e +cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), ['e', 'g']): http://bigmir.net/?a=b&c=d#e +cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'e']): http://bigmir.net/?a=b&c=d#test?g=h +cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'g']): http://bigmir.net/?a=b&c=d#test?e=f +cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), []): //bigmir.net/?a=b&c=d +cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), ['a']): //bigmir.net/?c=d +cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), ['a', 'c']): //bigmir.net/? +cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), ['a', 'e']): //bigmir.net/?c=d# +cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), ['a']): //bigmir.net/?a&c=d#e=f +cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), ['a', 'c']): //bigmir.net/?a#e=f +cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), ['a', 'e']): //bigmir.net/?a&c=d# +cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f&g=h'), ['c', 'g']): //bigmir.net/?a=b#e=f +cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), ['a', 'c']): //bigmir.net/?#e +cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), ['a', 'e']): //bigmir.net/?c=d#e +cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), ['c', 'e']): //bigmir.net/?a=b#e&g=h +cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), ['e', 'g']): //bigmir.net/?a=b&c=d#e +cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'e']): //bigmir.net/?a=b&c=d#test?g=h +cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'g']): //bigmir.net/?a=b&c=d#test?e=f diff --git a/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.sql b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.sql new file mode 100644 index 00000000000..ea2d6ae104f --- /dev/null +++ b/tests/queries/0_stateless/02483_cuturlparameter_with_arrays.sql @@ -0,0 +1,61 @@ +-- { echoOn } +SELECT + cutURLParameter('http://bigmir.net/?a=b&c=d', []), + cutURLParameter('http://bigmir.net/?a=b&c=d', ['a']), + cutURLParameter('http://bigmir.net/?a=b&c=d', ['a', 'c']), + cutURLParameter('http://bigmir.net/?a=b&c=d', ['c']), + cutURLParameter('http://bigmir.net/?a=b&c=d#e=f', ['a', 'e']), + cutURLParameter('http://bigmir.net/?a&c=d#e=f', ['c', 'e']), + cutURLParameter('http://bigmir.net/?a&c=d#e=f', ['e']), + cutURLParameter('http://bigmir.net/?a=b&c=d#e=f&g=h', ['b', 'g']), + cutURLParameter('http://bigmir.net/?a=b&c=d#e', ['a', 'e']), + cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', ['c', 'g']), + cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', ['e', 'g']), + cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'e']), + cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'g']), + cutURLParameter('//bigmir.net/?a=b&c=d', []), + cutURLParameter('//bigmir.net/?a=b&c=d', ['a']), + cutURLParameter('//bigmir.net/?a=b&c=d', ['a', 'c']), + cutURLParameter('//bigmir.net/?a=b&c=d#e=f', ['a', 'e']), + cutURLParameter('//bigmir.net/?a&c=d#e=f', ['a']), + cutURLParameter('//bigmir.net/?a&c=d#e=f', ['a', 'c']), + cutURLParameter('//bigmir.net/?a&c=d#e=f', ['a', 'e']), + cutURLParameter('//bigmir.net/?a=b&c=d#e=f&g=h', ['c', 'g']), + cutURLParameter('//bigmir.net/?a=b&c=d#e', ['a', 'c']), + cutURLParameter('//bigmir.net/?a=b&c=d#e', ['a', 'e']), + cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', ['c', 'e']), + cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', ['e', 'g']), + cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'e']), + cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', ['test', 'g']) + FORMAT Vertical; + +SELECT + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), []), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['a']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['a', 'c']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), ['c']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f'), ['a', 'e']), + cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), ['c', 'e']), + cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), ['e']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h'), ['b', 'g']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), ['a', 'e']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), ['c', 'g']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), ['e', 'g']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'e']), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'g']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), []), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), ['a']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), ['a', 'c']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), ['a', 'e']), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), ['a']), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), ['a', 'c']), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), ['a', 'e']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f&g=h'), ['c', 'g']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), ['a', 'c']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), ['a', 'e']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), ['c', 'e']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), ['e', 'g']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'e']), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), ['test', 'g']) + FORMAT Vertical; +-- { echoOff } diff --git a/tests/queries/0_stateless/02494_analyzer_cte_resolution_in_subquery_fix.reference b/tests/queries/0_stateless/02494_analyzer_cte_resolution_in_subquery_fix.reference new file mode 100644 index 00000000000..09373d75f5d --- /dev/null +++ b/tests/queries/0_stateless/02494_analyzer_cte_resolution_in_subquery_fix.reference @@ -0,0 +1,4 @@ +0 0 0 +0 0 0 +0 0 +0 0 diff --git a/tests/queries/0_stateless/02494_analyzer_cte_resolution_in_subquery_fix.sql b/tests/queries/0_stateless/02494_analyzer_cte_resolution_in_subquery_fix.sql new file mode 100644 index 00000000000..de7c7242b90 --- /dev/null +++ b/tests/queries/0_stateless/02494_analyzer_cte_resolution_in_subquery_fix.sql @@ -0,0 +1,9 @@ +WITH a AS (SELECT t1.number AS n1, t2.number AS n2 FROM numbers(1) AS t1, numbers(1) AS t2), b AS (SELECT sum(n1) AS s FROM a) +SELECT * FROM b AS l, a AS r; + +WITH a AS (SELECT t1.number AS n1, t2.number AS n2 FROM numbers(1) AS t1, numbers(1) AS t2), b AS (SELECT sum(n1) AS s FROM a) +SELECT * FROM b AS l, a AS r; + +WITH a AS (SELECT number FROM numbers(1)), b AS (SELECT number FROM a) SELECT * FROM b as l, a as r; + +WITH a AS (SELECT number FROM numbers(1)), b AS (SELECT number FROM a) SELECT * FROM a as l, b as r; diff --git a/tests/queries/0_stateless/02494_trace_log_profile_events.reference b/tests/queries/0_stateless/02494_trace_log_profile_events.reference new file mode 100644 index 00000000000..cd121fd3feb --- /dev/null +++ b/tests/queries/0_stateless/02494_trace_log_profile_events.reference @@ -0,0 +1,2 @@ +1 +1 1 diff --git a/tests/queries/0_stateless/02494_trace_log_profile_events.sh b/tests/queries/0_stateless/02494_trace_log_profile_events.sh new file mode 100755 index 00000000000..4dd0a34d202 --- /dev/null +++ b/tests/queries/0_stateless/02494_trace_log_profile_events.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-tsan, no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +query_id="$RANDOM-$CLICKHOUSE_DATABASE" +${CLICKHOUSE_CLIENT} --query_id $query_id --query "SELECT 1 FORMAT Null SETTINGS trace_profile_events = 0" + +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} --query "SELECT count() = 0 FROM system.trace_log WHERE query_id = '$query_id' AND trace_type = 'ProfileEvent'" + +query_id="$RANDOM-$CLICKHOUSE_DATABASE" +${CLICKHOUSE_CLIENT} --query_id $query_id --query "SELECT 1 FORMAT Null SETTINGS trace_profile_events = 1" + +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} --query "SELECT count() > 0, sum(empty(trace)) = 0 FROM system.trace_log WHERE query_id = '$query_id' AND trace_type = 'ProfileEvent'" diff --git a/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.reference b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.reference new file mode 100644 index 00000000000..726e74146fc --- /dev/null +++ b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.reference @@ -0,0 +1,4 @@ +199 +199 +1990 199 +1990 199 diff --git a/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql new file mode 100644 index 00000000000..7a51d86dd30 --- /dev/null +++ b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql @@ -0,0 +1,79 @@ +DROP TABLE IF EXISTS wikistat1; +DROP TABLE IF EXISTS wikistat2; + +CREATE TABLE wikistat1 +( + time DateTime, + project LowCardinality(String), + subproject LowCardinality(String), + path String, + hits UInt64, + PROJECTION total + ( + SELECT + project, + subproject, + path, + sum(hits), + count() + GROUP BY + project, + subproject, + path + ) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02494_zero_copy_and_projection', '1') +ORDER BY (path, time) +SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, allow_remote_fs_zero_copy_replication=1, min_bytes_for_wide_part=0; + +CREATE TABLE wikistat2 +( + time DateTime, + project LowCardinality(String), + subproject LowCardinality(String), + path String, + hits UInt64, + PROJECTION total + ( + SELECT + project, + subproject, + path, + sum(hits), + count() + GROUP BY + project, + subproject, + path + ) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02494_zero_copy_and_projection', '2') +ORDER BY (path, time) +SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0, allow_remote_fs_zero_copy_replication=1, min_bytes_for_wide_part=0; + +INSERT INTO wikistat1 SELECT toDateTime('2020-10-01 00:00:00'), 'hello', 'world', '/data/path', 10 from numbers(100); + +INSERT INTO wikistat1 SELECT toDateTime('2020-10-01 00:00:00'), 'hello', 'world', '/data/path', 10 from numbers(99, 99); + +SYSTEM SYNC REPLICA wikistat2; + +SELECT COUNT() from wikistat1 WHERE NOT ignore(*); +SELECT COUNT() from wikistat2 WHERE NOT ignore(*); + +SYSTEM STOP REPLICATION QUEUES wikistat2; + +ALTER TABLE wikistat1 DELETE where time = toDateTime('2022-12-20 00:00:00') SETTINGS mutations_sync = 1; + +SYSTEM START REPLICATION QUEUES wikistat2; + +SYSTEM SYNC REPLICA wikistat2; + +-- it doesn't make test flaky, rarely we will not delete the parts because of cleanup thread was slow. +-- Such condition will lead to successful queries. +SELECT 0 FROM numbers(5) WHERE sleepEachRow(1) = 1; + +select sum(hits), count() from wikistat1 GROUP BY project, subproject, path settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1; +select sum(hits), count() from wikistat2 GROUP BY project, subproject, path settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1; + +DROP TABLE wikistat1; +DROP TABLE wikistat2; diff --git a/tests/queries/0_stateless/02496_row_binary_large_string_size.reference b/tests/queries/0_stateless/02496_row_binary_large_string_size.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02496_row_binary_large_string_size.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02496_row_binary_large_string_size.sh b/tests/queries/0_stateless/02496_row_binary_large_string_size.sh new file mode 100755 index 00000000000..39f83f6c2b8 --- /dev/null +++ b/tests/queries/0_stateless/02496_row_binary_large_string_size.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +printf '\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff' | $CLICKHOUSE_LOCAL --format_binary_max_string_size=100000 --input-format=RowBinary --structure='s String' -q "select * from table" 2>&1 | grep -q -F "TOO_LARGE_STRING_SIZE" && echo "OK" || echo FAIL"" diff --git a/utils/check-style/check-mypy b/utils/check-style/check-mypy new file mode 100755 index 00000000000..42cb7fbbd15 --- /dev/null +++ b/utils/check-style/check-mypy @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# The mypy supports pyproject.toml, but unfortunately it doesn't support it recursively +# https://github.com/python/mypy/issues/10613 +# +# Unless it's done, mypy only runs against tests/ci +# Let's leave here a room for improvement and redo it when mypy will test anything else + +GIT_ROOT=$(git rev-parse --show-cdup) +GIT_ROOT=${GIT_ROOT:-.} +CONFIG="$GIT_ROOT/tests/ci/.mypy.ini" +DIRS=("$GIT_ROOT/tests/ci/" "$GIT_ROOT/tests/ci/"*/) +tmp=$(mktemp) +for dir in "${DIRS[@]}"; do + if ! compgen -G "$dir"/*.py > /dev/null; then + continue + fi + if ! mypy --config-file="$CONFIG" --sqlite-cache "$dir"/*.py > "$tmp" 2>&1; then + echo "Errors while processing $dir": + cat "$tmp" + fi +done +rm -rf "$tmp" diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 2c1061c3333..0793f6c816f 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,10 +1,16 @@ +v22.11.2.30-stable 2022-12-02 v22.11.1.1360-stable 2022-11-17 +v22.10.4.23-stable 2022-12-02 +v22.10.3.27-stable 2022-11-23 v22.10.2.11-stable 2022-11-01 v22.10.1.1877-stable 2022-10-26 +v22.9.6.20-stable 2022-12-02 +v22.9.5.25-stable 2022-11-23 v22.9.4.32-stable 2022-10-26 v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.10.29-lts 2022-12-02 v22.8.9.24-lts 2022-11-19 v22.8.8.3-lts 2022-10-27 v22.8.7.34-lts 2022-10-26 @@ -39,6 +45,7 @@ v22.4.5.9-stable 2022-05-06 v22.4.4.7-stable 2022-04-29 v22.4.3.3-stable 2022-04-26 v22.4.2.1-stable 2022-04-22 +v22.3.15.33-lts 2022-12-02 v22.3.14.23-lts 2022-10-28 v22.3.13.80-lts 2022-09-30 v22.3.12.19-lts 2022-08-29 diff --git a/utils/security-generator/generate_security.py b/utils/security-generator/generate_security.py index 4835a60d152..d25612e8bc6 100755 --- a/utils/security-generator/generate_security.py +++ b/utils/security-generator/generate_security.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 from pathlib import Path from typing import List