diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 6b6a02bb55b..72adba5d762 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -48,8 +48,9 @@ then mkdir /output/ch git -C /output/ch init --bare git -C /output/ch remote add origin /build - git -C /output/ch fetch --no-tags --depth 50 origin HEAD - git -C /output/ch reset --soft FETCH_HEAD + git -C /output/ch fetch --no-tags --depth 50 origin HEAD:pr + git -C /output/ch fetch --no-tags --depth 50 origin master:master + git -C /output/ch reset --soft pr git -C /output/ch log -5 fi diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index e1a7385ff50..61ca36a7ff5 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -72,7 +72,7 @@ function watchdog function fuzz { - ./clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log & + ./clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -10000 > server.log & server_pid=$! kill -0 $server_pid while ! ./clickhouse-client --query "select 1" && kill -0 $server_pid ; do echo . ; sleep 1 ; done @@ -83,7 +83,7 @@ function fuzz fuzzer_exit_code=0 ./clickhouse-client --query-fuzzer-runs=1000 \ < <(for f in $(ls ch/tests/queries/0_stateless/*.sql | sort -R); do cat "$f"; echo ';'; done) \ - > >(tail -100000 > fuzzer.log) \ + > >(tail -10000 > fuzzer.log) \ 2>&1 \ || fuzzer_exit_code=$? @@ -160,7 +160,7 @@ case "$stage" in echo "success" > status.txt else echo "failure" > status.txt - if ! grep -a "received signal \|Logical error" server.log > description.txt + if ! grep -a "Received signal \|Logical error" server.log > description.txt then echo "Fuzzer exit code $fuzzer_exit_code. See the logs" > description.txt fi diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 378e87f443b..9e1780f247a 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -317,9 +317,11 @@ create view right_query_log as select * '$(cat "right-query-log.tsv.columns")'); create view query_logs as - select *, 0 version from left_query_log + select 0 version, query_id, ProfileEvents.Names, ProfileEvents.Values, + query_duration_ms from left_query_log union all - select *, 1 version from right_query_log + select 1 version, query_id, ProfileEvents.Names, ProfileEvents.Values, + query_duration_ms from right_query_log ; -- This is a single source of truth on all metrics we have for query runs. The diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index 93888a9ab02..6cbfb77b290 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -6,7 +6,6 @@ trap 'kill $(jobs -pr) ||:' EXIT mkdir db0 ||: mkdir left ||: -mkdir right ||: left_pr=$1 left_sha=$2 @@ -24,7 +23,7 @@ dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_e function download { - # Historically there were various path for the performance test package. + # Historically there were various paths for the performance test package. # Test all of them. for path in "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/"{,clickhouse_build_check/}"performance/performance.tgz" do @@ -34,22 +33,13 @@ function download fi done - for path in "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/"{,clickhouse_build_check/}"performance/performance.tgz" - do - if curl --fail --head "$path" - then - right_path="$path" - fi - done - - # might have the same version on left and right - if ! [ "$left_path" = "$right_path" ] + # Might have the same version on left and right (for testing). + if ! [ "$left_sha" = "$right_sha" ] then wget -nv -nd -c "$left_path" -O- | tar -C left --strip-components=1 -zxv & - wget -nv -nd -c "$right_path" -O- | tar -C right --strip-components=1 -zxv & else - mkdir right ||: - wget -nv -nd -c "$left_path" -O- | tar -C left --strip-components=1 -zxv && cp -a left/* right & + mkdir left ||: + cp -a right/* left & fi for dataset_name in $datasets diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 64336d0a038..19b752570ed 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -1,38 +1,23 @@ #!/bin/bash set -ex -chown nobody workspace output -chgrp nogroup workspace output -chmod 777 workspace output - -cd workspace - -# Fetch the repository to find and describe the compared revisions. -rm -rf ch ||: -time git clone --depth 50 --bare https://github.com/ClickHouse/ClickHouse ch -git -C ch fetch origin "$SHA_TO_TEST" - +# Use the packaged repository to find the revision we will compare to. function find_reference_sha { - # If not master, try to fetch pull/.../{head,merge} - if [ "$PR_TO_TEST" != "0" ] - then - git -C ch fetch origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*" - fi - # Go back from the revision to be tested, trying to find the closest published - # testing release. - start_ref="$SHA_TO_TEST"~ - # If we are testing a PR, and it merges with master successfully, we are - # building and testing not the nominal last SHA specified by pull/.../head - # and SHA_TO_TEST, but a revision that is merged with recent master, given - # by pull/.../merge ref. - # Master is the first parent of the pull/.../merge. - if git -C ch rev-parse "pull/$PR_TO_TEST/merge" + # testing release. The PR branch may be either pull/*/head which is the + # author's branch, or pull/*/merge, which is head merged with some master + # automatically by Github. We will use a merge base with master as a reference + # for tesing (or some older commit). A caveat is that if we're testing the + # master, the merge base is the tested commit itself, so we have to step back + # once. + start_ref=$(git -C ch merge-base origin/master pr) + if [ "PR_TO_TEST" == "0" ] then - start_ref="pull/$PR_TO_TEST/merge~" + start_ref=$start_ref~ fi + # Loop back to find a commit that actually has a published perf test package. while : do # FIXME the original idea was to compare to a closest testing tag, which @@ -48,10 +33,10 @@ function find_reference_sha # dereference the tag to get the commit it points to, hence the '~0' thing. REF_SHA=$(git -C ch rev-parse "$ref_tag~0") - # FIXME sometimes we have testing tags on commits without published builds -- - # normally these are documentation commits. Loop to skip them. - # Historically there were various path for the performance test package. - # Test all of them. + # FIXME sometimes we have testing tags on commits without published builds. + # Normally these are documentation commits. Loop to skip them. + # Historically there were various path for the performance test package, + # test all of them. unset found for path in "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/"{,clickhouse_build_check/}"performance/performance.tgz" do @@ -69,6 +54,24 @@ function find_reference_sha REF_PR=0 } +chown nobody workspace output +chgrp nogroup workspace output +chmod 777 workspace output + +cd workspace + +# Download the package for the version we are going to test +for path in "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/"{,clickhouse_build_check/}"performance/performance.tgz" +do + if curl --fail --head "$path" + then + right_path="$path" + fi +done + +mkdir right +wget -nv -nd -c "$right_path" -O- | tar -C right --strip-components=1 -zxv + # Find reference revision if not specified explicitly if [ "$REF_SHA" == "" ]; then find_reference_sha; fi if [ "$REF_SHA" == "" ]; then echo Reference SHA is not specified ; exit 1 ; fi diff --git a/docs/_description_templates/template-statement.md b/docs/_description_templates/template-statement.md new file mode 100644 index 00000000000..62ea51edf83 --- /dev/null +++ b/docs/_description_templates/template-statement.md @@ -0,0 +1,24 @@ +# Statement name (for example, SHOW USER) + +Brief description of what the statement does. + +Syntax: + +```sql +Syntax of the statement. +``` + +## Other necessary sections of the description (Optional) + +Examples of descriptions with a complicated structure: + +- https://clickhouse.tech/docs/en/sql-reference/statements/grant/ +- https://clickhouse.tech/docs/en/sql-reference/statements/revoke/ +- https://clickhouse.tech/docs/en/sql-reference/statements/select/join/ + + +## See Also (Optional) + +Links to related topics as a list. + +- [link](#) diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 875217fdc49..bcbbc6ca237 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -1,3 +1,36 @@ ## system.asynchronous\_metric\_log {#system-tables-async-log} -Contains the historical values for `system.asynchronous_log` (see [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics)) +Contains the historical values for `system.asynchronous_metrics`, which are saved once per minute. This feature is enabled by default. + +Columns: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. +- `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. +- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. + +**Example** + +``` sql +SELECT * FROM system.asynchronous_metric_log LIMIT 10 +``` + +``` text +┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬────value─┐ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pmuzzy │ 0 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pdirty │ 4214 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.run_intervals │ 0 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.num_runs │ 0 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.retained │ 17657856 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.mapped │ 71471104 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.resident │ 61538304 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.metadata │ 6199264 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.allocated │ 38074336 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.epoch │ 2 │ +└────────────┴─────────────────────┴──────────────────────────────────────────┴──────────┘ +``` + +**See Also** + +- [system.asynchronous\_metrics](../system-tables/asynchronous_metrics.md) — Contains metrics that are calculated periodically in the background. +- [system.metric_log](../operations/system-tables/metric_log) — Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index 96c7e3c5319..3f3974685bd 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -1,4 +1,4 @@ -# system.asynchronous\_metrics {#system_tables-asynchronous_metrics} +# system.asynchronous_metrics {#system_tables-asynchronous_metrics} Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use. diff --git a/docs/en/operations/system-tables/metric_log.md b/docs/en/operations/system-tables/metric_log.md index ff15940d252..91d8553683e 100644 --- a/docs/en/operations/system-tables/metric_log.md +++ b/docs/en/operations/system-tables/metric_log.md @@ -49,7 +49,7 @@ CurrentMetric_ReplicatedChecks: 0 **See also** -- [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred. -- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. +- [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md) — Contains periodically calculated metrics. +- [system.events](../../operations/system-tables/events.md) — Contains a number of events that occurred. +- [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index c32643661d3..132f50fa24e 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -23,7 +23,8 @@ For temporary data an unique temporary data directory is created by default. If Basic usage: ``` bash -$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" -q "query" +$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" \ + --query "query" ``` Arguments: @@ -46,7 +47,8 @@ Also there are arguments for each ClickHouse configuration variable which are mo ## Examples {#examples} ``` bash -$ echo -e "1,2\n3,4" | clickhouse-local -S "a Int64, b Int64" -if "CSV" -q "SELECT * FROM table" +$ echo -e "1,2\n3,4" | clickhouse-local --structure "a Int64, b Int64" \ + --input-format "CSV" --query "SELECT * FROM table" Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec. 1 2 3 4 @@ -55,16 +57,37 @@ Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec. Previous example is the same as: ``` bash -$ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; DROP TABLE table" +$ echo -e "1,2\n3,4" | clickhouse-local --query " + CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); + SELECT a, b FROM table; + DROP TABLE table" Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec. 1 2 3 4 ``` +You don't have to use `stdin` or `--file` argument, and can open any number of files using the [`file` table function](../../sql-reference/table-functions/file.md): + +``` bash +$ echo 1 | tee 1.tsv +1 + +$ echo 2 | tee 2.tsv +2 + +$ clickhouse-local --query " + select * from file('1.tsv', TSV, 'a int') t1 + cross join file('2.tsv', TSV, 'b int') t2" +1 2 +``` + Now let’s output memory user for each Unix user: ``` bash -$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' | clickhouse-local -S "user String, mem Float64" -q "SELECT user, round(sum(mem), 2) as memTotal FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty" +$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \ + | clickhouse-local --structure "user String, mem Float64" \ + --query "SELECT user, round(sum(mem), 2) as memTotal + FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty" ``` ``` text diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 5593221f74c..0774f2176e8 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -503,3 +503,34 @@ Supported modifiers for Format: | %% | a % sign | % | [Original article](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) + +## FROM_UNIXTIME + +When there is only single argument of integer type, it act in the same way as `toDateTime` and return [DateTime](../../sql-reference/data-types/datetime.md). +type. + +For example: + +```sql +SELECT FROM_UNIXTIME(423543535) +``` + +```text +┌─FROM_UNIXTIME(423543535)─┐ +│ 1983-06-04 10:58:55 │ +└──────────────────────────┘ +``` + +When there are two arguments, first is integer or DateTime, second is constant format string, it act in the same way as `formatDateTime` and return `String` type. + +For example: + +```sql +SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime +``` + +```text +┌─DateTime────────────┐ +│ 2009-02-11 14:42:23 │ +└─────────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index f6a831ae080..798f0eb3739 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1351,6 +1351,44 @@ len: 30 - [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii) +## randomFixedString {#randomfixedstring} + +Generates a binary string of the specified length filled with random bytes (including zero bytes). + +**Syntax** + +``` sql +randomFixedString(length); +``` + +**Parameters** + +- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). + +**Returned value(s)** + +- String filled with random bytes. + +Type: [FixedString](../../sql-reference/data-types/fixedstring.md). + +**Example** + +Query: + +```sql +SELECT randomFixedString(13) as rnd, toTypeName(rnd) +``` + +Result: + +```text +┌─rnd──────┬─toTypeName(randomFixedString(13))─┐ +│ j▒h㋖HɨZ'▒ │ FixedString(13) │ +└──────────┴───────────────────────────────────┘ + +``` + + ## randomStringUTF8 {#randomstringutf8} Generates a random string of a specified length. Result string contains valid UTF-8 code points. The value of code points may be outside of the range of assigned Unicode. diff --git a/docs/en/sql-reference/statements/attach.md b/docs/en/sql-reference/statements/attach.md index 480ecd88c52..6260e15cfa9 100644 --- a/docs/en/sql-reference/statements/attach.md +++ b/docs/en/sql-reference/statements/attach.md @@ -1,5 +1,6 @@ --- toc_priority: 42 +toc_title: ATTACH --- # ATTACH Statement {#attach} diff --git a/docs/en/sql-reference/statements/check-table.md b/docs/en/sql-reference/statements/check-table.md index a65e35d1bca..cfb18161d64 100644 --- a/docs/en/sql-reference/statements/check-table.md +++ b/docs/en/sql-reference/statements/check-table.md @@ -1,5 +1,6 @@ --- toc_priority: 43 +toc_title: CHECK --- # CHECK TABLE Statement {#check-table} diff --git a/docs/en/sql-reference/statements/describe-table.md b/docs/en/sql-reference/statements/describe-table.md index fe63e39606e..b7ffec1db07 100644 --- a/docs/en/sql-reference/statements/describe-table.md +++ b/docs/en/sql-reference/statements/describe-table.md @@ -1,5 +1,6 @@ --- toc_priority: 44 +toc_title: DESCRIBE --- # DESCRIBE TABLE Statement {#misc-describe-table} diff --git a/docs/en/sql-reference/statements/detach.md b/docs/en/sql-reference/statements/detach.md index 42677079a8b..0d236c132df 100644 --- a/docs/en/sql-reference/statements/detach.md +++ b/docs/en/sql-reference/statements/detach.md @@ -1,5 +1,6 @@ --- toc_priority: 45 +toc_title: DETACH --- # DETACH Statement {#detach} @@ -11,6 +12,5 @@ DETACH TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` This does not delete the table’s data or metadata. On the next server launch, the server will read the metadata and find out about the table again. -Similarly, a “detached” table can be re-attached using the `ATTACH` query (with the exception of system tables, which do not have metadata stored for them). -There is no `DETACH DATABASE` query. +Similarly, a “detached” table can be re-attached using the `ATTACH` query (with the exception of system tables, which do not have metadata stored for them). diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index 582e24d4824..de4eda64ef6 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -1,5 +1,6 @@ --- toc_priority: 46 +toc_title: DROP --- # DROP Statements {#drop} diff --git a/docs/en/sql-reference/statements/exists.md b/docs/en/sql-reference/statements/exists.md index fb7ada666e2..2e8072125c7 100644 --- a/docs/en/sql-reference/statements/exists.md +++ b/docs/en/sql-reference/statements/exists.md @@ -1,5 +1,6 @@ --- toc_priority: 47 +toc_title: EXISTS --- # EXISTS Statement {#exists-statement} diff --git a/docs/en/sql-reference/statements/kill.md b/docs/en/sql-reference/statements/kill.md index 747c1c5d624..1e2d2e925f7 100644 --- a/docs/en/sql-reference/statements/kill.md +++ b/docs/en/sql-reference/statements/kill.md @@ -1,8 +1,9 @@ --- toc_priority: 48 +toc_title: KILL --- -## KILL Statements {#kill-statements} +# KILL Statements {#kill-statements} There are two kinds of kill statements: to kill a query and to kill a mutation diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 9d5587267a8..0f834f30db1 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -1,5 +1,6 @@ --- toc_priority: 49 +toc_title: OPTIMIZE --- # OPTIMIZE Statement {#misc_operations-optimize} diff --git a/docs/en/sql-reference/statements/rename.md b/docs/en/sql-reference/statements/rename.md index be7d7b1ecf6..315aaac40b5 100644 --- a/docs/en/sql-reference/statements/rename.md +++ b/docs/en/sql-reference/statements/rename.md @@ -1,5 +1,6 @@ --- toc_priority: 50 +toc_title: RENAME --- # RENAME Statement {#misc_operations-rename} diff --git a/docs/en/sql-reference/statements/set-role.md b/docs/en/sql-reference/statements/set-role.md index e9e13f8ba5b..564c8ec859f 100644 --- a/docs/en/sql-reference/statements/set-role.md +++ b/docs/en/sql-reference/statements/set-role.md @@ -1,5 +1,6 @@ --- toc_priority: 52 +toc_title: SET ROLE --- # SET ROLE Statement {#set-role-statement} diff --git a/docs/en/sql-reference/statements/set.md b/docs/en/sql-reference/statements/set.md index 95f6ee18895..4a5bbf0baf6 100644 --- a/docs/en/sql-reference/statements/set.md +++ b/docs/en/sql-reference/statements/set.md @@ -1,5 +1,6 @@ --- toc_priority: 51 +toc_title: SET --- # SET Statement {#query-set} diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 547d0751959..7a7f6ba44bb 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -5,6 +5,8 @@ toc_title: SYSTEM # SYSTEM Statements {#query-language-system} +The list of available `SYSTEM` statements: + - [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries) - [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries) - [RELOAD DICTIONARY](#query_language-system-reload-dictionary) @@ -36,7 +38,7 @@ toc_title: SYSTEM - [RESTART REPLICA](#query_language-system-restart-replica) - [RESTART REPLICAS](#query_language-system-restart-replicas) -## RELOAD EMBEDDED DICTIONARIES\] {#query_language-system-reload-emdedded-dictionaries} +## RELOAD EMBEDDED DICTIONARIES {#query_language-system-reload-emdedded-dictionaries} Reload all [Internal dictionaries](../../sql-reference/dictionaries/internal-dicts.md). By default, internal dictionaries are disabled. @@ -48,7 +50,7 @@ Reloads all dictionaries that have been successfully loaded before. By default, dictionaries are loaded lazily (see [dictionaries\_lazy\_load](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load)), so instead of being loaded automatically at startup, they are initialized on first access through dictGet function or SELECT from tables with ENGINE = Dictionary. The `SYSTEM RELOAD DICTIONARIES` query reloads such dictionaries (LOADED). Always returns `Ok.` regardless of the result of the dictionary update. -## RELOAD DICTIONARY Dictionary\_name {#query_language-system-reload-dictionary} +## RELOAD DICTIONARY {#query_language-system-reload-dictionary} Completely reloads a dictionary `dictionary_name`, regardless of the state of the dictionary (LOADED / NOT\_LOADED / FAILED). Always returns `Ok.` regardless of the result of updating the dictionary. diff --git a/docs/en/sql-reference/statements/truncate.md b/docs/en/sql-reference/statements/truncate.md index e259a106f82..a8cf802616f 100644 --- a/docs/en/sql-reference/statements/truncate.md +++ b/docs/en/sql-reference/statements/truncate.md @@ -1,5 +1,6 @@ --- toc_priority: 53 +toc_title: TRUNCATE --- # TRUNCATE Statement {#truncate-statement} diff --git a/docs/en/sql-reference/statements/use.md b/docs/en/sql-reference/statements/use.md index 16a36e581d9..2932542f052 100644 --- a/docs/en/sql-reference/statements/use.md +++ b/docs/en/sql-reference/statements/use.md @@ -1,5 +1,6 @@ --- toc_priority: 54 +toc_title: USE --- # USE Statement {#use} diff --git a/docs/ru/operations/system-tables.md b/docs/ru/operations/system-tables.md index 6e7ad1ebd5b..d11f4bcd3a0 100644 --- a/docs/ru/operations/system-tables.md +++ b/docs/ru/operations/system-tables.md @@ -46,6 +46,41 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 - [system.events](#system_tables-events) — таблица с количеством произошедших событий. - [system.metric\_log](#system_tables-metric_log) — таблица фиксирующая историю значений метрик из `system.metrics` и `system.events`. +## system.asynchronous\_metric\_log {#system-tables-async-log} + +Содержит исторические значения метрик из таблицы `system.asynchronous_metrics`, которые сохраняются раз в минуту. По умолчанию включена. + +Столбцы: +- `event_date` ([Date](../sql-reference/data-types/date.md)) — дата события. +- `event_time` ([DateTime](../sql-reference/data-types/datetime.md)) — время события. +- `name` ([String](../sql-reference/data-types/string.md)) — название метрики. +- `value` ([Float64](../sql-reference/data-types/float.md)) — значение метрики. + +**Пример** + +``` sql +SELECT * FROM system.asynchronous_metric_log LIMIT 10 +``` + +``` text +┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬────value─┐ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pmuzzy │ 0 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pdirty │ 4214 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.run_intervals │ 0 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.num_runs │ 0 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.retained │ 17657856 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.mapped │ 71471104 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.resident │ 61538304 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.metadata │ 6199264 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.allocated │ 38074336 │ +│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.epoch │ 2 │ +└────────────┴─────────────────────┴──────────────────────────────────────────┴──────────┘ +``` + +**Смотрите также** +- [system.asynchronous_metrics](../../operations/system-tables/asynchronous-metrics.md) — Содержит метрики, которые периодически вычисляются в фоновом режиме. +- [system.metric_log](#system_tables-metric_log) — таблица фиксирующая историю значений метрик из `system.metrics` и `system.events`. + ## system.clusters {#system-clusters} Содержит информацию о доступных в конфигурационном файле кластерах и серверах, которые в них входят. diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index ad14d665620..f477482428d 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -1334,9 +1334,46 @@ len: 30 - [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii) +## randomFixedString {#randomfixedstring} + +Генерирует бинарную строку заданной длины, заполненную случайными байтами, включая нулевые. + +**Синтаксис** + +``` sql +randomFixedString(length); +``` + +**Параметры** + +- `length` — Длина строки в байтах. [UInt64](../../sql-reference/data-types/int-uint.md). + +**Returned value(s)** + +- Строка, заполненная случайными байтами. + +Тип: [FixedString](../../sql-reference/data-types/fixedstring.md). + +**Пример** + +Запрос: + +```sql +SELECT randomFixedString(13) as rnd, toTypeName(rnd) +``` + +Результат: + +```text +┌─rnd──────┬─toTypeName(randomFixedString(13))─┐ +│ j▒h㋖HɨZ'▒ │ FixedString(13) │ +└──────────┴───────────────────────────────────┘ + +``` + ## randomStringUTF8 {#randomstringutf8} -Генерирует строку определенной длины со случайной строкой в кодировке UTF-8. +Генерирует строку заданной длины со случайными символами в кодировке UTF-8. **Синтаксис** diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 62ab3f5ccdf..d87375acc6a 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -35,4 +35,4 @@ soupsieve==2.0.1 termcolor==1.1.0 tornado==5.1.1 Unidecode==1.1.1 -urllib3==1.25.9 +urllib3==1.25.10 diff --git a/docs/tools/translate/requirements.txt b/docs/tools/translate/requirements.txt index 70b0ab8fdcb..1bbd119b823 100644 --- a/docs/tools/translate/requirements.txt +++ b/docs/tools/translate/requirements.txt @@ -9,4 +9,4 @@ python-slugify==4.0.1 PyYAML==5.3.1 requests==2.24.0 text-unidecode==1.3 -urllib3==1.25.9 +urllib3==1.25.10 diff --git a/docs/zh/sql-reference/statements/alter.md b/docs/zh/sql-reference/statements/alter.md index 26b5e66cc8a..676fd65831d 100644 --- a/docs/zh/sql-reference/statements/alter.md +++ b/docs/zh/sql-reference/statements/alter.md @@ -37,7 +37,7 @@ ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after] 使用指定的`name`, `type`, [`codec`](../../sql-reference/statements/create.md#codecs) 以及 `default_expr` (请参见 [Default expressions](../../sql-reference/statements/create.md#create-default-values)),往表中增加新的列。 -如果sql中包含 `IF NOT EXISTS` ,执行语句时如果列已经存在,CH不会报错。如果指定`AFTER name_after`(表中另一个列的名称),则新的列会加在指定列的后面。否则,新的列将被添加到表的末尾。注意,不能讲新的列添加到表的开始位置, `name_after` 可以是执行该动作时已经在表中存在的任意列。 +如果sql中包含 `IF NOT EXISTS` ,执行语句时如果列已经存在,CH不会报错。如果指定`AFTER name_after`(表中另一个列的名称),则新的列会加在指定列的后面。否则,新的列将被添加到表的末尾。注意,不能将新的列添加到表的开始位置, `name_after` 可以是执行该动作时已经在表中存在的任意列。 添加列仅仅是改变原有表的结构不会对已有数据产生影响。执行完 `ALTER`后磁盘中也不会出现新的数据。如果查询表时列的数据为空,那么CH会使用列的默认值来进行填充(如果有默认表达式,则使用这个;或者用0或空字符串)。当数据块完成合并(参见[MergeTree](../../engines/table-engines/mergetree-family/mergetree.md))后,磁盘中会出现该列的数据。 @@ -166,7 +166,7 @@ MODIFY ORDER BY new_expression 该操作仅支持 [`MergeTree`](../../engines/table-engines/mergetree-family/mergetree.md) 系列表 (含 [replicated](../../engines/table-engines/mergetree-family/replication.md) 表)。它会将表的 [排序键](../../engines/table-engines/mergetree-family/mergetree.md)变成 `new_expression` (元组表达式)。主键仍保持不变。 -该操作时轻量级的,仅会改变元数据。 +该操作是轻量级的,仅会改变元数据。 ### 跳过索引来更改数据 {#manipulations-with-data-skipping-indices} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d375b75fcc7..281a0150625 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -323,7 +323,8 @@ struct Settings : public SettingsCollection M(SettingLogsLevel, send_logs_level, LogsLevel::fatal, "Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \ M(SettingBool, enable_optimize_predicate_expression, 1, "If it is set to true, optimize predicates to subqueries.", 0) \ M(SettingBool, enable_optimize_predicate_expression_to_final_subquery, 1, "Allow push predicate to final subquery.", 0) \ - \ + M(SettingBool, allow_push_predicate_when_subquery_contains_with, 1, "Allows push predicate when subquery contains WITH clause", 0) \ +\ M(SettingUInt64, low_cardinality_max_dictionary_size, 8192, "Maximum size (in rows) of shared global dictionary for LowCardinality type.", 0) \ M(SettingBool, low_cardinality_use_single_dictionary_for_part, false, "LowCardinality type serialization setting. If is true, than will use additional keys when global dictionary overflows. Otherwise, will create several shared dictionaries.", 0) \ M(SettingBool, decimal_check_overflow, true, "Check overflow of decimal arithmetic/comparison operations", 0) \ diff --git a/src/DataStreams/ParallelParsingBlockInputStream.cpp b/src/DataStreams/ParallelParsingBlockInputStream.cpp index 536b7bd77f4..f89a6f030c0 100644 --- a/src/DataStreams/ParallelParsingBlockInputStream.cpp +++ b/src/DataStreams/ParallelParsingBlockInputStream.cpp @@ -1,11 +1,108 @@ #include +#include +#include +#include +#include namespace DB { -void ParallelParsingBlockInputStream::segmentatorThreadFunction() +ParallelParsingBlockInputStream::ParallelParsingBlockInputStream(const Params & params) + : header(params.input_creator_params.sample), + row_input_format_params(params.input_creator_params.row_input_format_params), + format_settings(params.input_creator_params.settings), + input_processor_creator(params.input_processor_creator), + min_chunk_bytes(params.min_chunk_bytes), + original_buffer(params.read_buffer), + // Subtract one thread that we use for segmentation and one for + // reading. After that, must have at least two threads left for + // parsing. See the assertion below. + pool(std::max(2, params.max_threads - 2)), + file_segmentation_engine(params.file_segmentation_engine) { + // See comment above. + assert(params.max_threads >= 4); + + // One unit for each thread, including segmentator and reader, plus a + // couple more units so that the segmentation thread doesn't spuriously + // bump into reader thread on wraparound. + processing_units.resize(params.max_threads + 2); + + segmentator_thread = ThreadFromGlobalPool( + &ParallelParsingBlockInputStream::segmentatorThreadFunction, this, CurrentThread::getGroup()); +} + +ParallelParsingBlockInputStream::~ParallelParsingBlockInputStream() +{ + finishAndWait(); +} + +void ParallelParsingBlockInputStream::cancel(bool kill) +{ + /** + * Can be called multiple times, from different threads. Saturate the + * the kill flag with OR. + */ + if (kill) + is_killed = true; + is_cancelled = true; + + /* + * The format parsers themselves are not being cancelled here, so we'll + * have to wait until they process the current block. Given that the + * chunk size is on the order of megabytes, this should't be too long. + * We can't call IInputFormat->cancel here, because the parser object is + * local to the parser thread, and we don't want to introduce any + * synchronization between parser threads and the other threads to get + * better performance. An ideal solution would be to add a callback to + * IInputFormat that checks whether it was cancelled. + */ + + finishAndWait(); +} + +void ParallelParsingBlockInputStream::scheduleParserThreadForUnitWithNumber(size_t ticket_number) +{ + pool.scheduleOrThrowOnError([this, ticket_number, group = CurrentThread::getGroup()]() + { + parserThreadFunction(group, ticket_number); + }); +} + +void ParallelParsingBlockInputStream::finishAndWait() +{ + finished = true; + + { + std::unique_lock lock(mutex); + segmentator_condvar.notify_all(); + reader_condvar.notify_all(); + } + + if (segmentator_thread.joinable()) + segmentator_thread.join(); + + try + { + pool.wait(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +void ParallelParsingBlockInputStream::segmentatorThreadFunction(ThreadGroupStatusPtr thread_group) +{ + SCOPE_EXIT( + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + ); + if (thread_group) + CurrentThread::attachTo(thread_group); + setThreadName("Segmentator"); + try { while (!finished) @@ -49,12 +146,19 @@ void ParallelParsingBlockInputStream::segmentatorThreadFunction() } } -void ParallelParsingBlockInputStream::parserThreadFunction(size_t current_ticket_number) +void ParallelParsingBlockInputStream::parserThreadFunction(ThreadGroupStatusPtr thread_group, size_t current_ticket_number) { + SCOPE_EXIT( + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + ); + if (thread_group) + CurrentThread::attachTo(thread_group); + + setThreadName("ChunkParser"); + try { - setThreadName("ChunkParser"); - const auto current_unit_number = current_ticket_number % processing_units.size(); auto & unit = processing_units[current_unit_number]; diff --git a/src/DataStreams/ParallelParsingBlockInputStream.h b/src/DataStreams/ParallelParsingBlockInputStream.h index a904c686e47..4c110f8a937 100644 --- a/src/DataStreams/ParallelParsingBlockInputStream.h +++ b/src/DataStreams/ParallelParsingBlockInputStream.h @@ -3,15 +3,14 @@ #include #include #include -#include -#include -#include #include #include namespace DB { +class ReadBuffer; + /** * ORDER-PRESERVING parallel parsing of data formats. * It splits original data into chunks. Then each chunk is parsed by different thread. @@ -74,68 +73,16 @@ public: size_t min_chunk_bytes; }; - explicit ParallelParsingBlockInputStream(const Params & params) - : header(params.input_creator_params.sample), - row_input_format_params(params.input_creator_params.row_input_format_params), - format_settings(params.input_creator_params.settings), - input_processor_creator(params.input_processor_creator), - min_chunk_bytes(params.min_chunk_bytes), - original_buffer(params.read_buffer), - // Subtract one thread that we use for segmentation and one for - // reading. After that, must have at least two threads left for - // parsing. See the assertion below. - pool(std::max(2, params.max_threads - 2)), - file_segmentation_engine(params.file_segmentation_engine) - { - // See comment above. - assert(params.max_threads >= 4); - - // One unit for each thread, including segmentator and reader, plus a - // couple more units so that the segmentation thread doesn't spuriously - // bump into reader thread on wraparound. - processing_units.resize(params.max_threads + 2); - - segmentator_thread = ThreadFromGlobalPool([this] { segmentatorThreadFunction(); }); - } + explicit ParallelParsingBlockInputStream(const Params & params); + ~ParallelParsingBlockInputStream() override; String getName() const override { return "ParallelParsing"; } + Block getHeader() const override { return header; } - ~ParallelParsingBlockInputStream() override - { - finishAndWait(); - } - - void cancel(bool kill) override - { - /** - * Can be called multiple times, from different threads. Saturate the - * the kill flag with OR. - */ - if (kill) - is_killed = true; - is_cancelled = true; - - /* - * The format parsers themselves are not being cancelled here, so we'll - * have to wait until they process the current block. Given that the - * chunk size is on the order of megabytes, this should't be too long. - * We can't call IInputFormat->cancel here, because the parser object is - * local to the parser thread, and we don't want to introduce any - * synchronization between parser threads and the other threads to get - * better performance. An ideal solution would be to add a callback to - * IInputFormat that checks whether it was cancelled. - */ - - finishAndWait(); - } - - Block getHeader() const override - { - return header; - } + void cancel(bool kill) override; protected: - //Reader routine + // Reader routine Block readImpl() override; const BlockMissingValues & getMissingValues() const override @@ -212,36 +159,11 @@ private: std::deque processing_units; - void scheduleParserThreadForUnitWithNumber(size_t ticket_number) - { - pool.scheduleOrThrowOnError(std::bind(&ParallelParsingBlockInputStream::parserThreadFunction, this, ticket_number)); - } + void scheduleParserThreadForUnitWithNumber(size_t ticket_number); + void finishAndWait(); - void finishAndWait() - { - finished = true; - - { - std::unique_lock lock(mutex); - segmentator_condvar.notify_all(); - reader_condvar.notify_all(); - } - - if (segmentator_thread.joinable()) - segmentator_thread.join(); - - try - { - pool.wait(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - - void segmentatorThreadFunction(); - void parserThreadFunction(size_t current_ticket_number); + void segmentatorThreadFunction(ThreadGroupStatusPtr thread_group); + void parserThreadFunction(ThreadGroupStatusPtr thread_group, size_t current_ticket_number); // Save/log a background exception, set termination flag, wake up all // threads. This function is used by segmentator and parsed threads. diff --git a/src/Functions/DummyJSONParser.h b/src/Functions/DummyJSONParser.h index 7bdcf205926..4f4facba957 100644 --- a/src/Functions/DummyJSONParser.h +++ b/src/Functions/DummyJSONParser.h @@ -37,8 +37,8 @@ struct DummyJSONParser double getDouble() const { return 0; } bool getBool() const { return false; } std::string_view getString() const { return {}; } - Array getArray() const; - Object getObject() const; + Array getArray() const { return {}; } + Object getObject() const { return {}; } }; /// References an array in a JSON document. diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp index 3a8264fb542..a33b70684a5 100644 --- a/src/Functions/FunctionJoinGet.cpp +++ b/src/Functions/FunctionJoinGet.cpp @@ -1,10 +1,10 @@ #include -#include #include #include #include #include +#include #include @@ -16,35 +16,19 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -template -void ExecutableFunctionJoinGet::execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t) -{ - Block keys; - for (size_t i = 2; i < arguments.size(); ++i) - { - auto key = block.getByPosition(arguments[i]); - keys.insert(std::move(key)); - } - block.getByPosition(result) = join->joinGet(keys, result_block); -} - -template -ExecutableFunctionImplPtr FunctionJoinGet::prepare(const Block &, const ColumnNumbers &, size_t) const -{ - return std::make_unique>(join, Block{{return_type->createColumn(), return_type, attr_name}}); -} - static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & context) { + if (arguments.size() != 3) + throw Exception{"Function joinGet takes 3 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + String join_name; if (const auto * name_col = checkAndGetColumnConst(arguments[0].column.get())) { join_name = name_col->getValue(); } else - throw Exception( - "Illegal type " + arguments[0].type->getName() + " of first argument of function joinGet, expected a const string.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function joinGet, expected a const string.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; size_t dot = join_name.find('.'); String database_name; @@ -59,12 +43,10 @@ static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & co ++dot; } String table_name = join_name.substr(dot); - if (table_name.empty()) - throw Exception("joinGet does not allow empty table name", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); auto table = DatabaseCatalog::instance().getTable({database_name, table_name}, context); auto storage_join = std::dynamic_pointer_cast(table); if (!storage_join) - throw Exception("Table " + join_name + " should have engine StorageJoin", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception{"Table " + join_name + " should have engine StorageJoin", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; String attr_name; if (const auto * name_col = checkAndGetColumnConst(arguments[1].column.get())) @@ -72,9 +54,9 @@ static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & co attr_name = name_col->getValue(); } else - throw Exception( - "Illegal type " + arguments[1].type->getName() + " of second argument of function joinGet, expected a const string.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception{"Illegal type " + arguments[1].type->getName() + + " of second argument of function joinGet, expected a const string.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; return std::make_pair(storage_join, attr_name); } @@ -83,22 +65,44 @@ FunctionBaseImplPtr JoinGetOverloadResolver::build(const ColumnsWithTyp { auto [storage_join, attr_name] = getJoin(arguments, context); auto join = storage_join->getJoin(); - DataTypes data_types(arguments.size() - 2); - for (size_t i = 2; i < arguments.size(); ++i) - data_types[i - 2] = arguments[i].type; - auto return_type = join->joinGetCheckAndGetReturnType(data_types, attr_name, or_null); + DataTypes data_types(arguments.size()); + auto table_lock = storage_join->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); + for (size_t i = 0; i < arguments.size(); ++i) + data_types[i] = arguments[i].type; + + auto return_type = join->joinGetReturnType(attr_name, or_null); return std::make_unique>(table_lock, storage_join, join, attr_name, data_types, return_type); } template -void JoinGetOverloadResolver::checkNumberOfArgumentsIfVariadic(size_t number_of_arguments) const +DataTypePtr JoinGetOverloadResolver::getReturnType(const ColumnsWithTypeAndName & arguments) const { - if (number_of_arguments < 3) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(number_of_arguments) - + ", should be greater or equal to 3", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + auto [storage_join, attr_name] = getJoin(arguments, context); + auto join = storage_join->getJoin(); + return join->joinGetReturnType(attr_name, or_null); +} + + +template +void ExecutableFunctionJoinGet::execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) +{ + auto ctn = block.getByPosition(arguments[2]); + if (isColumnConst(*ctn.column)) + ctn.column = ctn.column->cloneResized(1); + ctn.name = ""; // make sure the key name never collide with the join columns + Block key_block = {ctn}; + join->joinGet(key_block, attr_name, or_null); + auto & result_ctn = key_block.getByPosition(1); + if (isColumnConst(*ctn.column)) + result_ctn.column = ColumnConst::create(result_ctn.column, input_rows_count); + block.getByPosition(result) = result_ctn; +} + +template +ExecutableFunctionImplPtr FunctionJoinGet::prepare(const Block &, const ColumnNumbers &, size_t) const +{ + return std::make_unique>(join, attr_name); } void registerFunctionJoinGet(FunctionFactory & factory) diff --git a/src/Functions/FunctionJoinGet.h b/src/Functions/FunctionJoinGet.h index 2057f05496a..a82da589960 100644 --- a/src/Functions/FunctionJoinGet.h +++ b/src/Functions/FunctionJoinGet.h @@ -13,14 +13,14 @@ template class ExecutableFunctionJoinGet final : public IExecutableFunctionImpl { public: - ExecutableFunctionJoinGet(HashJoinPtr join_, const Block & result_block_) - : join(std::move(join_)), result_block(result_block_) {} + ExecutableFunctionJoinGet(HashJoinPtr join_, String attr_name_) + : join(std::move(join_)), attr_name(std::move(attr_name_)) {} static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet"; bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForLowCardinalityColumns() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return true; } void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; @@ -28,7 +28,7 @@ public: private: HashJoinPtr join; - Block result_block; + const String attr_name; }; template @@ -77,15 +77,13 @@ public: String getName() const override { return name; } FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const override; - DataTypePtr getReturnType(const ColumnsWithTypeAndName &) const override { return {}; } // Not used + DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override; bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForLowCardinalityColumns() const override { return true; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 1}; } - void checkNumberOfArgumentsIfVariadic(size_t number_of_arguments) const override; private: const Context & context; diff --git a/src/Functions/IFunctionAdaptors.h b/src/Functions/IFunctionAdaptors.h index 82afaad4c27..4920de9ad42 100644 --- a/src/Functions/IFunctionAdaptors.h +++ b/src/Functions/IFunctionAdaptors.h @@ -221,11 +221,6 @@ class DefaultOverloadResolver : public IFunctionOverloadResolverImpl public: explicit DefaultOverloadResolver(std::shared_ptr function_) : function(std::move(function_)) {} - void checkNumberOfArgumentsIfVariadic(size_t number_of_arguments) const override - { - return function->checkNumberOfArgumentsIfVariadic(number_of_arguments); - } - bool isDeterministic() const override { return function->isDeterministic(); } bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); } bool isInjective(const Block &block) const override { return function->isInjective(block); } diff --git a/src/Functions/IFunctionImpl.h b/src/Functions/IFunctionImpl.h index 52ec60608b6..2148c9da0fb 100644 --- a/src/Functions/IFunctionImpl.h +++ b/src/Functions/IFunctionImpl.h @@ -156,12 +156,6 @@ public: virtual bool isStateful() const { return false; } virtual bool isVariadic() const { return false; } - /// Will be called if isVariadic returns true. You need to check if function can have specified number of arguments. - virtual void checkNumberOfArgumentsIfVariadic(size_t /*number_of_arguments*/) const - { - throw Exception("checkNumberOfArgumentsIfVariadic is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - virtual void getLambdaArgumentTypes(DataTypes & /*arguments*/) const { throw Exception("Function " + getName() + " can't have lambda-expressions as arguments", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -289,11 +283,6 @@ public: virtual bool isVariadic() const { return false; } - virtual void checkNumberOfArgumentsIfVariadic(size_t /*number_of_arguments*/) const - { - throw Exception("checkNumberOfArgumentsIfVariadic is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); - } - virtual void getLambdaArgumentTypes(DataTypes & /*arguments*/) const { throw Exception("Function " + getName() + " can't have lambda-expressions as arguments", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index f84a9aa561e..58e441bd42b 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -4,10 +4,12 @@ #include #include -#include -#include -#include #include +#include +#include +#include +#include +#include #include #include @@ -34,12 +36,20 @@ namespace ErrorCodes namespace { // in private namespace to avoid GCC 9 error: "explicit specialization in non-namespace scope" -template struct ActionaValueTypeMap {}; -template <> struct ActionaValueTypeMap { using ActionValueType = UInt16; }; -template <> struct ActionaValueTypeMap { using ActionValueType = UInt32; }; +template struct ActionValueTypeMap {}; +template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; +template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; +template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; +template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; +template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; +template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; +template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; +template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; +template <> struct ActionValueTypeMap { using ActionValueType = UInt16; }; +template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; // TODO(vnemkov): once there is support for Int64 in LUT, make that Int64. // TODO(vnemkov): to add sub-second format instruction, make that DateTime64 and do some math in Action. -template <> struct ActionaValueTypeMap { using ActionValueType = UInt32; }; +template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; } /** formatDateTime(time, 'pattern') @@ -80,10 +90,25 @@ template <> struct ActionaValueTypeMap { using ActionValueTy * * PS. We can make this function to return FixedString. Currently it returns String. */ -class FunctionFormatDateTime : public IFunction +template +class FunctionFormatDateTimeImpl : public IFunction { private: /// Time is either UInt32 for DateTime or UInt16 for Date. + template + static bool castType(const IDataType * type, F && f) + { + return castTypeToEither< + DataTypeInt8, + DataTypeUInt8, + DataTypeInt16, + DataTypeUInt16, + DataTypeInt32, + DataTypeUInt32, + DataTypeInt64, + DataTypeUInt64>(type, std::forward(f)); + } + template class Action { @@ -251,9 +276,9 @@ private: }; public: - static constexpr auto name = "formatDateTime"; + static constexpr auto name = Name::name; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { @@ -269,37 +294,103 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 2 && arguments.size() != 3) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 2 or 3", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - if (!WhichDataType(arguments[0].type).isDateOrDateTime()) - throw Exception("Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + - ". Should be a date or a date with time", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (!WhichDataType(arguments[1].type).isString()) - throw Exception("Illegal type " + arguments[1].type->getName() + " of 2 argument of function " + getName() + ". Must be String.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (arguments.size() == 3) + if constexpr (support_integer) { - if (!WhichDataType(arguments[2].type).isString()) - throw Exception("Illegal type " + arguments[2].type->getName() + " of 3 argument of function " + getName() + ". Must be String.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (arguments.size() != 1 && arguments.size() != 2 && arguments.size() != 3) + throw Exception( + "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + + ", should be 1, 2 or 3", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (arguments.size() == 1 && !isInteger(arguments[0].type)) + throw Exception( + "Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + + " when arguments size is 1. Should be integer", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (arguments.size() > 1 && !(isInteger(arguments[0].type) || WhichDataType(arguments[0].type).isDateOrDateTime())) + throw Exception( + "Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + + " when arguments size is 2 or 3. Should be a integer or a date with time", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + else + { + if (arguments.size() != 2 && arguments.size() != 3) + throw Exception( + "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + + ", should be 2 or 3", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (!WhichDataType(arguments[0].type).isDateOrDateTime()) + throw Exception( + "Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + + ". Should be a date or a date with time", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } + if (arguments.size() == 2 && !WhichDataType(arguments[1].type).isString()) + throw Exception( + "Illegal type " + arguments[1].type->getName() + " of 2 argument of function " + getName() + ". Must be String.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (arguments.size() == 3 && !WhichDataType(arguments[2].type).isString()) + throw Exception( + "Illegal type " + arguments[2].type->getName() + " of 3 argument of function " + getName() + ". Must be String.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (arguments.size() == 1) + return std::make_shared(); return std::make_shared(); } - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) const override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, [[maybe_unused]] size_t input_rows_count) const override { - if (!executeType(block, arguments, result) - && !executeType(block, arguments, result) - && !executeType(block, arguments, result)) - throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() - + " of function " + getName() + ", must be Date or DateTime", + if constexpr (support_integer) + { + if (arguments.size() == 1) + { + if (!castType(block.getByPosition(arguments[0]).type.get(), [&](const auto & type) + { + using FromDataType = std::decay_t; + ConvertImpl::execute(block, arguments, result, input_rows_count); + return true; + })) + { + throw Exception( + "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of function " + getName() + + ", must be Integer or DateTime when arguments size is 1.", + ErrorCodes::ILLEGAL_COLUMN); + } + } + else + { + if (!castType(block.getByPosition(arguments[0]).type.get(), [&](const auto & type) + { + using FromDataType = std::decay_t; + if (!executeType(block, arguments, result)) + throw Exception( + "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of function " + getName() + + ", must be Integer or DateTime.", + ErrorCodes::ILLEGAL_COLUMN); + return true; + })) + { + if (!executeType(block, arguments, result) && !executeType(block, arguments, result) + && !executeType(block, arguments, result)) + throw Exception( + "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of function " + getName() + + ", must be Integer or DateTime.", ErrorCodes::ILLEGAL_COLUMN); + } + } + } + else + { + if (!executeType(block, arguments, result) && !executeType(block, arguments, result) + && !executeType(block, arguments, result)) + throw Exception( + "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of function " + getName() + + ", must be Date or DateTime.", + ErrorCodes::ILLEGAL_COLUMN); + } } template @@ -318,13 +409,17 @@ public: String pattern = pattern_column->getValue(); - using T = typename ActionaValueTypeMap::ActionValueType; + using T = typename ActionValueTypeMap::ActionValueType; std::vector> instructions; String pattern_to_fill = parsePattern(pattern, instructions); size_t result_size = pattern_to_fill.size(); const DateLUTImpl * time_zone_tmp = nullptr; - if (std::is_same_v || std::is_same_v) + if (castType(block.getByPosition(arguments[0]).type.get(), [&]([[maybe_unused]] const auto & type) { return true; })) + { + time_zone_tmp = &extractTimeZoneFromFunctionArguments(block, arguments, 2, 0); + } + else if (std::is_same_v || std::is_same_v) time_zone_tmp = &extractTimeZoneFromFunctionArguments(block, arguments, 2, 0); else time_zone_tmp = &DateLUT::instance(); @@ -606,9 +701,23 @@ public: } }; +struct NameFormatDateTime +{ + static constexpr auto name = "formatDateTime"; +}; + +struct NameFromUnixTime +{ + static constexpr auto name = "FROM_UNIXTIME"; +}; + +using FunctionFormatDateTime = FunctionFormatDateTimeImpl; +using FunctionFROM_UNIXTIME = FunctionFormatDateTimeImpl; + void registerFunctionFormatDateTime(FunctionFactory & factory) { factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/randConstant.cpp b/src/Functions/randConstant.cpp index ebf2f752b66..ac410fc35c4 100644 --- a/src/Functions/randConstant.cpp +++ b/src/Functions/randConstant.cpp @@ -76,20 +76,20 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } - void checkNumberOfArgumentsIfVariadic(size_t number_of_arguments) const override - { - if (number_of_arguments > 1) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(number_of_arguments) + ", should be 0 or 1.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - } - static FunctionOverloadResolverImplPtr create(const Context &) { return std::make_unique>(); } - DataTypePtr getReturnType(const DataTypes &) const override { return std::make_shared>(); } + DataTypePtr getReturnType(const DataTypes & data_types) const override + { + size_t number_of_arguments = data_types.size(); + if (number_of_arguments > 1) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(number_of_arguments) + ", should be 0 or 1.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + return std::make_shared>(); + } FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override { diff --git a/src/Interpreters/AnyInputOptimize.cpp b/src/Interpreters/AnyInputOptimize.cpp deleted file mode 100644 index 1b31ea4024b..00000000000 --- a/src/Interpreters/AnyInputOptimize.cpp +++ /dev/null @@ -1,98 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int ILLEGAL_AGGREGATION; -} - -namespace -{ - constexpr auto * any = "any"; - constexpr auto * anyLast = "anyLast"; -} - -ASTPtr * getExactChild(const ASTPtr & ast, const size_t ind) -{ - if (ast && ast->as()->arguments->children[ind]) - return &ast->as()->arguments->children[ind]; - return nullptr; -} - -///recursive searching of identifiers -void changeAllIdentifiers(ASTPtr & ast, size_t ind, const std::string & name) -{ - ASTPtr * exact_child = getExactChild(ast, ind); - if (!exact_child) - return; - - if ((*exact_child)->as()) - { - ///put new any - ASTPtr old_ast = *exact_child; - *exact_child = makeASTFunction(name); - (*exact_child)->as()->arguments->children.push_back(old_ast); - } - else if ((*exact_child)->as()) - { - if (AggregateFunctionFactory::instance().isAggregateFunctionName((*exact_child)->as()->name)) - throw Exception("Aggregate function " + (*exact_child)->as()->name + - " is found inside aggregate function " + name + " in query", ErrorCodes::ILLEGAL_AGGREGATION); - - for (size_t i = 0; i < (*exact_child)->as()->arguments->children.size(); i++) - changeAllIdentifiers(*exact_child, i, name); - } -} - - -///cut old any, put any to identifiers. any(functions(x)) -> functions(any(x)) -void AnyInputMatcher::visit(ASTPtr & current_ast, Data data) -{ - data = {}; - if (!current_ast) - return; - - auto * function_node = current_ast->as(); - if (!function_node || function_node->arguments->children.empty()) - return; - - const auto & function_argument = function_node->arguments->children[0]; - if ((function_node->name == any || function_node->name == anyLast) - && function_argument && function_argument->as()) - { - auto name = function_node->name; - auto alias = function_node->alias; - - ///cut any or anyLast - if (!function_argument->as()->arguments->children.empty()) - { - current_ast = function_argument->clone(); - current_ast->setAlias(alias); - for (size_t i = 0; i < current_ast->as()->arguments->children.size(); ++i) - changeAllIdentifiers(current_ast, i, name); - } - } -} - -bool AnyInputMatcher::needChildVisit(const ASTPtr & node, const ASTPtr & child) -{ - if (!child) - throw Exception("AST item should not have nullptr in children", ErrorCodes::LOGICAL_ERROR); - - if (node->as() || node->as()) - return false; // NOLINT - - return true; -} - -} diff --git a/src/Interpreters/AnyInputOptimize.h b/src/Interpreters/AnyInputOptimize.h deleted file mode 100644 index 6e782578e35..00000000000 --- a/src/Interpreters/AnyInputOptimize.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -///This optimiser is similar to ArithmeticOperationsInAgrFunc optimizer, but for function any we can extract any functions. -class AnyInputMatcher -{ -public: - struct Data {}; - - static void visit(ASTPtr & ast, Data data); - static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); -}; -using AnyInputVisitor = InDepthNodeVisitor; -} diff --git a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp index c67986d97c0..3ee7b59197a 100644 --- a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp +++ b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp @@ -153,7 +153,9 @@ void ArithmeticOperationsInAgrFuncMatcher::visit(ASTPtr & ast, Data & data) bool ArithmeticOperationsInAgrFuncMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &) { - return !node->as() && !node->as(); + return !node->as() && + !node->as() && + !node->as(); } } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index bcbde3ef6af..98816a4eab9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1563,7 +1563,7 @@ void Context::reloadClusterConfig() return; } - /// Clusters config has been suddenly changed, recompute clusters + // Clusters config has been suddenly changed, recompute clusters } } } diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index ffc806b9e88..27294a57675 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -42,7 +42,6 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; extern const int SET_SIZE_LIMIT_EXCEEDED; extern const int TYPE_MISMATCH; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } namespace @@ -1110,34 +1109,27 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) block = block.cloneWithColumns(std::move(dst_columns)); } +static void checkTypeOfKey(const Block & block_left, const Block & block_right) +{ + const auto & [c1, left_type_origin, left_name] = block_left.safeGetByPosition(0); + const auto & [c2, right_type_origin, right_name] = block_right.safeGetByPosition(0); + auto left_type = removeNullable(left_type_origin); + auto right_type = removeNullable(right_type_origin); -DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const + if (!left_type->equals(*right_type)) + throw Exception("Type mismatch of columns to joinGet by: " + + left_name + " " + left_type->getName() + " at left, " + + right_name + " " + right_type->getName() + " at right", + ErrorCodes::TYPE_MISMATCH); +} + + +DataTypePtr HashJoin::joinGetReturnType(const String & column_name, bool or_null) const { std::shared_lock lock(data->rwlock); - size_t num_keys = data_types.size(); - if (right_table_keys.columns() != num_keys) - throw Exception( - "Number of arguments for function joinGet" + toString(or_null ? "OrNull" : "") - + " doesn't match: passed, should be equal to " + toString(num_keys), - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - for (size_t i = 0; i < num_keys; ++i) - { - const auto & left_type_origin = data_types[i]; - const auto & [c2, right_type_origin, right_name] = right_table_keys.safeGetByPosition(i); - auto left_type = removeNullable(left_type_origin); - auto right_type = removeNullable(right_type_origin); - if (!left_type->equals(*right_type)) - throw Exception( - "Type mismatch in joinGet key " + toString(i) + ": found type " + left_type->getName() + ", while the needed type is " - + right_type->getName(), - ErrorCodes::TYPE_MISMATCH); - } - if (!sample_block_with_columns_to_add.has(column_name)) throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); - auto elem = sample_block_with_columns_to_add.getByName(column_name); if (or_null) elem.type = makeNullable(elem.type); @@ -1146,33 +1138,34 @@ DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, template -ColumnWithTypeAndName HashJoin::joinGetImpl(const Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const +void HashJoin::joinGetImpl(Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const { - // Assemble the key block with correct names. - Block keys; - for (size_t i = 0; i < block.columns(); ++i) - { - auto key = block.getByPosition(i); - key.name = key_names_right[i]; - keys.insert(std::move(key)); - } - - joinBlockImpl( - keys, key_names_right, block_with_columns_to_add, maps_); - return keys.getByPosition(keys.columns() - 1); + joinBlockImpl( + block, {block.getByPosition(0).name}, block_with_columns_to_add, maps_); } +// TODO: support composite key // TODO: return multiple columns as named tuple // TODO: return array of values when strictness == ASTTableJoin::Strictness::All -ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block_with_columns_to_add) const +void HashJoin::joinGet(Block & block, const String & column_name, bool or_null) const { std::shared_lock lock(data->rwlock); + if (key_names_right.size() != 1) + throw Exception("joinGet only supports StorageJoin containing exactly one key", ErrorCodes::UNSUPPORTED_JOIN_KEYS); + + checkTypeOfKey(block, right_table_keys); + + auto elem = sample_block_with_columns_to_add.getByName(column_name); + if (or_null) + elem.type = makeNullable(elem.type); + elem.column = elem.type->createColumn(); + if ((strictness == ASTTableJoin::Strictness::Any || strictness == ASTTableJoin::Strictness::RightAny) && kind == ASTTableJoin::Kind::Left) { - return joinGetImpl(block, block_with_columns_to_add, std::get(data->maps)); + joinGetImpl(block, {elem}, std::get(data->maps)); } else throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN); diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 025f41ac28f..67d83d27a6d 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -162,11 +162,11 @@ public: */ void joinBlock(Block & block, ExtraBlockPtr & not_processed) override; - /// Check joinGet arguments and infer the return type. - DataTypePtr joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const; + /// Infer the return type for joinGet function + DataTypePtr joinGetReturnType(const String & column_name, bool or_null) const; - /// Used by joinGet function that turns StorageJoin into a dictionary. - ColumnWithTypeAndName joinGet(const Block & block, const Block & block_with_columns_to_add) const; + /// Used by joinGet function that turns StorageJoin into a dictionary + void joinGet(Block & block, const String & column_name, bool or_null) const; /** Keep "totals" (separate part of dataset, see WITH TOTALS) to use later. */ @@ -383,7 +383,7 @@ private: void joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const; template - ColumnWithTypeAndName joinGetImpl(const Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const; + void joinGetImpl(Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const; static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes); }; diff --git a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 432a7f40b12..051341964a2 100644 --- a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -16,6 +16,7 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED; extern const int LOGICAL_ERROR; } @@ -150,6 +151,12 @@ private: { if (node.name == "in" || node.name == "notIn") { + if (node.arguments->children.size() != 2) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Function '{}' expects two arguments, given: '{}'", + node.name, node.formatForErrorMessage()); + } auto & subquery = node.arguments->children.at(1); std::vector renamed; NonGlobalTableVisitor::Data table_data{data.checker, data.context, renamed, &node, nullptr}; diff --git a/src/Interpreters/PredicateExpressionsOptimizer.cpp b/src/Interpreters/PredicateExpressionsOptimizer.cpp index fea0228e3fe..3915a0f7f43 100644 --- a/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -21,6 +21,7 @@ PredicateExpressionsOptimizer::PredicateExpressionsOptimizer( const Context & context_, const TablesWithColumns & tables_with_columns_, const Settings & settings) : enable_optimize_predicate_expression(settings.enable_optimize_predicate_expression) , enable_optimize_predicate_expression_to_final_subquery(settings.enable_optimize_predicate_expression_to_final_subquery) + , allow_push_predicate_when_subquery_contains_with(settings.allow_push_predicate_when_subquery_contains_with) , context(context_) , tables_with_columns(tables_with_columns_) { @@ -151,7 +152,8 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTable(ASTPtr & table_e if (!table_predicates.empty()) { auto optimize_final = enable_optimize_predicate_expression_to_final_subquery; - PredicateRewriteVisitor::Data data(context, table_predicates, std::move(table_columns), optimize_final); + auto optimize_with = allow_push_predicate_when_subquery_contains_with; + PredicateRewriteVisitor::Data data(context, table_predicates, std::move(table_columns), optimize_final, optimize_with); PredicateRewriteVisitor(data).visit(table_element); return data.is_rewrite; diff --git a/src/Interpreters/PredicateExpressionsOptimizer.h b/src/Interpreters/PredicateExpressionsOptimizer.h index f555c68020e..8cceda93164 100644 --- a/src/Interpreters/PredicateExpressionsOptimizer.h +++ b/src/Interpreters/PredicateExpressionsOptimizer.h @@ -25,6 +25,7 @@ public: private: const bool enable_optimize_predicate_expression; const bool enable_optimize_predicate_expression_to_final_subquery; + const bool allow_push_predicate_when_subquery_contains_with; const Context & context; const TablesWithColumns & tables_with_columns; diff --git a/src/Interpreters/PredicateRewriteVisitor.cpp b/src/Interpreters/PredicateRewriteVisitor.cpp index 2a4bd4c1fd2..9795675bcc8 100644 --- a/src/Interpreters/PredicateRewriteVisitor.cpp +++ b/src/Interpreters/PredicateRewriteVisitor.cpp @@ -17,8 +17,8 @@ namespace DB { PredicateRewriteVisitorData::PredicateRewriteVisitorData( - const Context & context_, const ASTs & predicates_, Names && column_names_, bool optimize_final_) - : context(context_), predicates(predicates_), column_names(column_names_), optimize_final(optimize_final_) + const Context & context_, const ASTs & predicates_, Names && column_names_, bool optimize_final_, bool optimize_with_) + : context(context_), predicates(predicates_), column_names(column_names_), optimize_final(optimize_final_), optimize_with(optimize_with_) { } @@ -85,7 +85,8 @@ static void cleanAliasAndCollectIdentifiers(ASTPtr & predicate, std::vector +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + +bool extractIdentifiers(const ASTFunction & func, std::vector & identifiers) +{ + for (auto & arg : func.arguments->children) + { + if (const auto * arg_func = arg->as()) + { + if (arg_func->name == "lambda") + return false; + + if (AggregateFunctionFactory::instance().isAggregateFunctionName(arg_func->name)) + return false; + + if (!extractIdentifiers(*arg_func, identifiers)) + return false; + } + else if (arg->as()) + identifiers.emplace_back(&arg); + } + + return true; +} + +} + + +void RewriteAnyFunctionMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * func = ast->as()) + visit(*func, ast, data); +} + +void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data) +{ + if (func.arguments->children.empty() || !func.arguments->children[0]) + return; + + if (func.name != "any" && func.name != "anyLast") + return; + + auto & func_arguments = func.arguments->children; + + const auto * first_arg_func = func_arguments[0]->as(); + if (!first_arg_func || first_arg_func->arguments->children.empty()) + return; + + /// We have rewritten this function. Just unwrap its argument. + if (data.rewritten.count(ast.get())) + { + func_arguments[0]->setAlias(func.alias); + ast = func_arguments[0]; + return; + } + + std::vector identifiers; + if (!extractIdentifiers(func, identifiers)) + return; + + /// Wrap identifiers: any(f(x, y, g(z))) -> any(f(any(x), any(y), g(any(z)))) + for (auto * ast_to_change : identifiers) + { + ASTPtr identifier_ast = *ast_to_change; + *ast_to_change = makeASTFunction(func.name); + (*ast_to_change)->as()->arguments->children.emplace_back(identifier_ast); + } + + data.rewritten.insert(ast.get()); + + /// Unwrap function: any(f(any(x), any(y), g(any(z)))) -> f(any(x), any(y), g(any(z))) + func_arguments[0]->setAlias(func.alias); + ast = func_arguments[0]; +} + +bool RewriteAnyFunctionMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &) +{ + return !node->as() && + !node->as() && + !node->as(); +} + +} diff --git a/src/Interpreters/RewriteAnyFunctionVisitor.h b/src/Interpreters/RewriteAnyFunctionVisitor.h new file mode 100644 index 00000000000..d29af322711 --- /dev/null +++ b/src/Interpreters/RewriteAnyFunctionVisitor.h @@ -0,0 +1,29 @@ +#pragma once + +#include + +#include +#include + +namespace DB +{ + +class ASTFunction; + +/// Rewrite 'any' and 'anyLast' functions pushing them inside original function. +/// any(f(x, y, g(z))) -> f(any(x), any(y), g(any(z))) +class RewriteAnyFunctionMatcher +{ +public: + struct Data + { + std::unordered_set rewritten; + }; + + static void visit(ASTPtr & ast, Data & data); + static void visit(const ASTFunction &, ASTPtr & ast, Data & data); + static bool needChildVisit(const ASTPtr & node, const ASTPtr & child); +}; +using RewriteAnyFunctionVisitor = InDepthNodeVisitor; + +} diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 8dc941439fb..3fdefb3d773 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -459,11 +459,10 @@ void optimizeAggregationFunctions(ASTPtr & query) ArithmeticOperationsInAgrFuncVisitor(data).visit(query); } -void optimizeAnyInput(ASTPtr & query) +void optimizeAnyFunctions(ASTPtr & query) { - /// Removing arithmetic operations from functions - AnyInputVisitor::Data data = {}; - AnyInputVisitor(data).visit(query); + RewriteAnyFunctionVisitor::Data data = {}; + RewriteAnyFunctionVisitor(data).visit(query); } void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, const Context & context) @@ -521,9 +520,9 @@ void TreeOptimizer::apply(ASTPtr & query, Aliases & aliases, const NameSet & sou if (settings.optimize_group_by_function_keys) optimizeGroupByFunctionKeys(select_query); - ///Move all operations out of any function + /// Move all operations out of any function if (settings.optimize_move_functions_out_of_any) - optimizeAnyInput(query); + optimizeAnyFunctions(query); /// Remove injective functions inside uniq if (settings.optimize_injective_functions_inside_uniq) diff --git a/src/Interpreters/misc.h b/src/Interpreters/misc.h index cae2691ca1f..094dfbbbb81 100644 --- a/src/Interpreters/misc.h +++ b/src/Interpreters/misc.h @@ -28,7 +28,7 @@ inline bool functionIsLikeOperator(const std::string & name) inline bool functionIsJoinGet(const std::string & name) { - return startsWith(name, "joinGet"); + return name == "joinGet" || startsWith(name, "dictGet"); } inline bool functionIsDictGet(const std::string & name) diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 48be2fae1b9..c571b857365 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -20,7 +20,6 @@ SRCS( addTypeConversionToAST.cpp AggregateDescription.cpp Aggregator.cpp - AnyInputOptimize.cpp ArithmeticOperationsInAgrFuncOptimize.cpp ArithmeticOperationsInAgrFuncOptimize.h ArrayJoinAction.cpp @@ -127,6 +126,7 @@ SRCS( ReplaceQueryParameterVisitor.cpp RequiredSourceColumnsData.cpp RequiredSourceColumnsVisitor.cpp + RewriteAnyFunctionVisitor.cpp RowRefs.cpp Set.cpp SetVariants.cpp diff --git a/src/Processors/Sources/SourceFromInputStream.h b/src/Processors/Sources/SourceFromInputStream.h index 88a045e65a2..9d0acb74943 100644 --- a/src/Processors/Sources/SourceFromInputStream.h +++ b/src/Processors/Sources/SourceFromInputStream.h @@ -12,6 +12,7 @@ using BlockInputStreamPtr = std::shared_ptr; class SourceFromInputStream : public ISourceWithProgress { public: + /// If force_add_aggregating_info is enabled, AggregatedChunkInfo (with bucket number and is_overflows flag) will be added to result chunk. explicit SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_ = false); String getName() const override { return "SourceFromInputStream"; } diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index fcc72b1717f..8aebb09b8f4 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -506,7 +506,6 @@ MergeTreeRangeReader::MergeTreeRangeReader( if (prewhere->alias_actions) prewhere->alias_actions->execute(sample_block, true); - sample_block_before_prewhere = sample_block; if (prewhere->prewhere_actions) prewhere->prewhere_actions->execute(sample_block, true); @@ -826,7 +825,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r for (auto name_and_type = header.begin(); pos < num_columns; ++pos, ++name_and_type) block.insert({result.columns[pos], name_and_type->type, name_and_type->name}); - if (prewhere && prewhere->alias_actions) + if (prewhere->alias_actions) prewhere->alias_actions->execute(block); /// Columns might be projected out. We need to store them here so that default columns can be evaluated later. diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 0af19e70c2b..381b87ecffd 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -222,7 +222,6 @@ private: Stream stream; Block sample_block; - Block sample_block_before_prewhere; bool last_reader_in_chain = false; bool is_initialized = false; diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index f0965e4d2c5..5bd1c0620dd 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -37,4 +37,11 @@ list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC}) set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) add_library(clickhouse_storages_system ${storages_system_headers} ${storages_system_sources}) -target_link_libraries(clickhouse_storages_system PRIVATE dbms common string_utils clickhouse_common_zookeeper clickhouse_parsers) +target_link_libraries(clickhouse_storages_system PRIVATE + dbms + common + string_utils + clickhouse_common_zookeeper + clickhouse_parsers + Poco::JSON +) diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/__init__.py b/tests/integration/test_input_format_parallel_parsing_memory_tracking/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml new file mode 100644 index 00000000000..26cb2bfa782 --- /dev/null +++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml @@ -0,0 +1,4 @@ + + + 3000000000 + diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py new file mode 100644 index 00000000000..06a1a72162d --- /dev/null +++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py @@ -0,0 +1,33 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name +# pylint: disable=line-too-long + +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +instance = cluster.add_instance('instance', main_configs=['configs/conf.xml']) + +@pytest.fixture(scope='module', autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +# max_memory_usage_for_user cannot be used, since the memory for user accounted +# correctly, only total is not +def test_memory_tracking_total(): + instance.query(''' + CREATE TABLE null (row String) ENGINE=Null; + ''') + instance.exec_in_container(['bash', '-c', + 'clickhouse client -q "SELECT arrayStringConcat(arrayMap(x->toString(cityHash64(x)), range(1000)), \' \') from numbers(10000)" > data.json']) + for it in range(0, 20): + # the problem can be triggered only via HTTP, + # since clickhouse-client parses the data by itself. + assert instance.exec_in_container(['curl', '--silent', '--show-error', '--data-binary', '@data.json', + 'http://127.1:8123/?query=INSERT%20INTO%20null%20FORMAT%20TSV']) == '', 'Failed on {} iteration'.format(it) diff --git a/tests/queries/0_stateless/01080_join_get_null.reference b/tests/queries/0_stateless/01080_join_get_null.reference index 0cfbf08886f..bfde072a796 100644 --- a/tests/queries/0_stateless/01080_join_get_null.reference +++ b/tests/queries/0_stateless/01080_join_get_null.reference @@ -1 +1 @@ -2 +2 2 diff --git a/tests/queries/0_stateless/01080_join_get_null.sql b/tests/queries/0_stateless/01080_join_get_null.sql index 9f782452d34..71e7ddf8e75 100644 --- a/tests/queries/0_stateless/01080_join_get_null.sql +++ b/tests/queries/0_stateless/01080_join_get_null.sql @@ -1,12 +1,12 @@ DROP TABLE IF EXISTS test_joinGet; +DROP TABLE IF EXISTS test_join_joinGet; -CREATE TABLE test_joinGet(user_id Nullable(Int32), name String) Engine = Join(ANY, LEFT, user_id); +CREATE TABLE test_joinGet(id Int32, user_id Nullable(Int32)) Engine = Memory(); +CREATE TABLE test_join_joinGet(user_id Int32, name String) Engine = Join(ANY, LEFT, user_id); -INSERT INTO test_joinGet VALUES (2, 'a'), (6, 'b'), (10, 'c'), (null, 'd'); +INSERT INTO test_join_joinGet VALUES (2, 'a'), (6, 'b'), (10, 'c'); -SELECT toNullable(toInt32(2)) user_id WHERE joinGet(test_joinGet, 'name', user_id) != ''; - --- If the JOIN keys are Nullable fields, the rows where at least one of the keys has the value NULL are not joined. -SELECT cast(null AS Nullable(Int32)) user_id WHERE joinGet(test_joinGet, 'name', user_id) != ''; +SELECT 2 id, toNullable(toInt32(2)) user_id WHERE joinGet(test_join_joinGet, 'name', user_id) != ''; DROP TABLE test_joinGet; +DROP TABLE test_join_joinGet; diff --git a/tests/queries/0_stateless/01322_any_input_optimize.reference b/tests/queries/0_stateless/01322_any_input_optimize.reference index 209a4afb3a2..4b6fbfd32c0 100644 --- a/tests/queries/0_stateless/01322_any_input_optimize.reference +++ b/tests/queries/0_stateless/01322_any_input_optimize.reference @@ -1,3 +1,30 @@ -9 SELECT any(number) + (any(number) * 2) -FROM numbers(3, 10) +FROM numbers(1, 2) +3 +SELECT anyLast(number) + (anyLast(number) * 2) +FROM numbers(1, 2) +6 +WITH any(number) * 3 AS x +SELECT x +FROM numbers(1, 2) +3 +SELECT + anyLast(number) * 3 AS x, + x +FROM numbers(1, 2) +6 6 +SELECT any(number + (number * 2)) +FROM numbers(1, 2) +3 +SELECT anyLast(number + (number * 2)) +FROM numbers(1, 2) +6 +WITH any(number * 3) AS x +SELECT x +FROM numbers(1, 2) +3 +SELECT + anyLast(number * 3) AS x, + x +FROM numbers(1, 2) +6 6 diff --git a/tests/queries/0_stateless/01322_any_input_optimize.sql b/tests/queries/0_stateless/01322_any_input_optimize.sql index 65f09d65738..4b8a55d4c7b 100644 --- a/tests/queries/0_stateless/01322_any_input_optimize.sql +++ b/tests/queries/0_stateless/01322_any_input_optimize.sql @@ -1,4 +1,32 @@ -SET optimize_move_functions_out_of_any=1; -SET enable_debug_queries=1; -SELECT any(number + number * 2) FROM numbers(3, 10); -ANALYZE SELECT any(number + number * 2) FROM numbers(3, 10); +SET enable_debug_queries = 1; +SET optimize_move_functions_out_of_any = 1; + +ANALYZE SELECT any(number + number * 2) FROM numbers(1, 2); +SELECT any(number + number * 2) FROM numbers(1, 2); + +ANALYZE SELECT anyLast(number + number * 2) FROM numbers(1, 2); +SELECT anyLast(number + number * 2) FROM numbers(1, 2); + +ANALYZE WITH any(number * 3) AS x SELECT x FROM numbers(1, 2); +WITH any(number * 3) AS x SELECT x FROM numbers(1, 2); + +ANALYZE SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); +SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); + +SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 } + +SET optimize_move_functions_out_of_any = 0; + +ANALYZE SELECT any(number + number * 2) FROM numbers(1, 2); +SELECT any(number + number * 2) FROM numbers(1, 2); + +ANALYZE SELECT anyLast(number + number * 2) FROM numbers(1, 2); +SELECT anyLast(number + number * 2) FROM numbers(1, 2); + +ANALYZE WITH any(number * 3) AS x SELECT x FROM numbers(1, 2); +WITH any(number * 3) AS x SELECT x FROM numbers(1, 2); + +ANALYZE SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); +SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2); + +SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 } diff --git a/tests/queries/0_stateless/01400_join_get_with_multi_keys.reference b/tests/queries/0_stateless/01400_join_get_with_multi_keys.reference deleted file mode 100644 index 49d59571fbf..00000000000 --- a/tests/queries/0_stateless/01400_join_get_with_multi_keys.reference +++ /dev/null @@ -1 +0,0 @@ -0.1 diff --git a/tests/queries/0_stateless/01400_join_get_with_multi_keys.sql b/tests/queries/0_stateless/01400_join_get_with_multi_keys.sql deleted file mode 100644 index 73068270762..00000000000 --- a/tests/queries/0_stateless/01400_join_get_with_multi_keys.sql +++ /dev/null @@ -1,9 +0,0 @@ -DROP TABLE IF EXISTS test_joinGet; - -CREATE TABLE test_joinGet(a String, b String, c Float64) ENGINE = Join(any, left, a, b); - -INSERT INTO test_joinGet VALUES ('ab', '1', 0.1), ('ab', '2', 0.2), ('cd', '3', 0.3); - -SELECT joinGet(test_joinGet, 'c', 'ab', '1'); - -DROP TABLE test_joinGet; diff --git a/tests/queries/0_stateless/01411_from_unixtime.reference b/tests/queries/0_stateless/01411_from_unixtime.reference new file mode 100644 index 00000000000..8541fa3f1a0 --- /dev/null +++ b/tests/queries/0_stateless/01411_from_unixtime.reference @@ -0,0 +1,29 @@ +1970-01-01 00:02:03 +1973-11-29 21:33:09 +2038-07-12 01:15:36 +19 +11 +1970-01-15 +1970-01-15 06:52:36 +20 +02 +01/02/18 + 2 +2018-01-02 +22 +02 +10 +11 +12 +001 +366 +01 +33 +\n +AM +AM +PM +22:33 +44 +\t +22:33:44 diff --git a/tests/queries/0_stateless/01411_from_unixtime.sql b/tests/queries/0_stateless/01411_from_unixtime.sql new file mode 100644 index 00000000000..ec7b4d65b57 --- /dev/null +++ b/tests/queries/0_stateless/01411_from_unixtime.sql @@ -0,0 +1,29 @@ +SELECT formatDateTime(FROM_UNIXTIME(123), '%Y-%m-%d %R:%S', 'UTC'); +SELECT formatDateTime(FROM_UNIXTIME(123456789), '%Y-%m-%d %R:%S', 'UTC'); +SELECT formatDateTime(FROM_UNIXTIME(6457477432), '%Y-%m-%d %R:%S', 'UTC'); +SELECT FROM_UNIXTIME(5345345, '%C', 'UTC'); +SELECT FROM_UNIXTIME(645123, '%H', 'UTC'); +SELECT FROM_UNIXTIME(1232456, '%Y-%m-%d', 'UTC'); +SELECT FROM_UNIXTIME(1234356, '%Y-%m-%d %R:%S', 'UTC'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%C'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%d'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%D'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%e'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%F'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%H'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 02:33:44'), '%H'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%I'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 11:33:44'), '%I'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 00:33:44'), '%I'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-01 00:33:44'), '%j'); +SELECT FROM_UNIXTIME(toDateTime('2000-12-31 00:33:44'), '%j'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%m'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%M'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%n'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 00:33:44'), '%p'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 11:33:44'), '%p'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 12:33:44'), '%p'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%R'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%S'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%t'); +SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%T'); diff --git a/tests/queries/0_stateless/01412_row_from_totals.reference b/tests/queries/0_stateless/01412_row_from_totals.reference new file mode 100644 index 00000000000..f13c9bf5487 --- /dev/null +++ b/tests/queries/0_stateless/01412_row_from_totals.reference @@ -0,0 +1,4 @@ +2020-07-10 +2020-07-11 + +0000-00-00 diff --git a/tests/queries/0_stateless/01412_row_from_totals.sql b/tests/queries/0_stateless/01412_row_from_totals.sql new file mode 100644 index 00000000000..170eb06721a --- /dev/null +++ b/tests/queries/0_stateless/01412_row_from_totals.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS tracking_events_tmp; +DROP TABLE IF EXISTS open_events_tmp; + +CREATE TABLE tracking_events_tmp (`APIKey` UInt32, `EventDate` Date) ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (APIKey, EventDate); +CREATE TABLE open_events_tmp (`APIKey` UInt32, `EventDate` Date) ENGINE = MergeTree PARTITION BY toMonday(EventDate) ORDER BY (APIKey, EventDate); + +insert into open_events_tmp select 2, '2020-07-10' from numbers(32); +insert into open_events_tmp select 2, '2020-07-11' from numbers(31); + +insert into tracking_events_tmp select 2, '2020-07-10' from numbers(1881); +insert into tracking_events_tmp select 2, '2020-07-11' from numbers(1623); + +SELECT EventDate +FROM +( + SELECT EventDate + FROM tracking_events_tmp AS t1 + WHERE (EventDate >= toDate('2020-07-10')) AND (EventDate <= toDate('2020-07-11')) AND (APIKey = 2) + GROUP BY EventDate +) +FULL OUTER JOIN +( + SELECT EventDate + FROM remote('127.0.0.{1,3}', currentDatabase(), open_events_tmp) AS t2 + WHERE (EventDate >= toDate('2020-07-10')) AND (EventDate <= toDate('2020-07-11')) AND (APIKey = 2) + GROUP BY EventDate + WITH TOTALS +) USING EventDate +ORDER BY EventDate +settings totals_mode = 'after_having_auto', group_by_overflow_mode = 'any', max_rows_to_group_by = 10000000, joined_subquery_requires_alias=0; + + +DROP TABLE IF EXISTS tracking_events_tmp; +DROP TABLE IF EXISTS open_events_tmp; diff --git a/tests/queries/0_stateless/01414_freeze_does_not_prevent_alters.reference b/tests/queries/0_stateless/01414_freeze_does_not_prevent_alters.reference new file mode 100644 index 00000000000..bd952f321d6 --- /dev/null +++ b/tests/queries/0_stateless/01414_freeze_does_not_prevent_alters.reference @@ -0,0 +1,26 @@ +1 hello +2 world +all_1_1_0 0 +--- +all_1_1_0 1 +--- +1 goodbye +2 world +all_1_1_0 1 +all_1_1_0_2 0 +--- +1 goodbye +2 world +all_1_1_0 1 +all_1_1_0_2 0 +all_1_1_0_3 0 +--- +all_1_1_0 1 +all_1_1_0_2 0 +all_1_1_0_3 1 +--- +1 hello +2 world +all_1_1_0 1 +all_1_1_0_2 0 +all_1_1_0_3 1 diff --git a/tests/queries/0_stateless/01414_freeze_does_not_prevent_alters.sql b/tests/queries/0_stateless/01414_freeze_does_not_prevent_alters.sql new file mode 100644 index 00000000000..ad201022412 --- /dev/null +++ b/tests/queries/0_stateless/01414_freeze_does_not_prevent_alters.sql @@ -0,0 +1,35 @@ +-- In previous ClickHouse versions, parts were not 100% immutable and FREEZE may prevent subsequent ALTERs. +-- It's not longer the case. Let's prove it. + +DROP TABLE IF EXISTS t; +CREATE TABLE t (k UInt64, s String) ENGINE = MergeTree ORDER BY k; +INSERT INTO t VALUES (1, 'hello'), (2, 'world'); + +SELECT * FROM t; +SELECT name, is_frozen FROM system.parts WHERE database = currentDatabase() AND table = 't'; + +SELECT '---'; +ALTER TABLE t FREEZE; +SELECT name, is_frozen FROM system.parts WHERE database = currentDatabase() AND table = 't'; + +SELECT '---'; +SET mutations_sync = 1; +ALTER TABLE t UPDATE s = 'goodbye' WHERE k = 1; +SELECT * FROM t; +SELECT name, is_frozen FROM system.parts WHERE database = currentDatabase() AND table = 't'; + +SELECT '---'; +ALTER TABLE t MODIFY COLUMN s Enum('goodbye' = 1, 'world' = 2); +SELECT * FROM t; +SELECT name, is_frozen FROM system.parts WHERE database = currentDatabase() AND table = 't'; + +SELECT '---'; +ALTER TABLE t FREEZE; +SELECT name, is_frozen FROM system.parts WHERE database = currentDatabase() AND table = 't'; + +SELECT '---'; +ALTER TABLE t MODIFY COLUMN s Enum('hello' = 1, 'world' = 2); +SELECT * FROM t; +SELECT name, is_frozen FROM system.parts WHERE database = currentDatabase() AND table = 't'; + +DROP TABLE t; diff --git a/tests/queries/0_stateless/01414_optimize_any_bug.reference b/tests/queries/0_stateless/01414_optimize_any_bug.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/01414_optimize_any_bug.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/01414_optimize_any_bug.sql b/tests/queries/0_stateless/01414_optimize_any_bug.sql new file mode 100644 index 00000000000..6f6f291c504 --- /dev/null +++ b/tests/queries/0_stateless/01414_optimize_any_bug.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE test +( + `Source.C1` Array(UInt64), + `Source.C2` Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY tuple(); + +SET optimize_move_functions_out_of_any = 1; + +SELECT any(arrayFilter((c, d) -> (4 = d), `Source.C1`, `Source.C2`)[1]) AS x +FROM test +WHERE 0 +GROUP BY 42; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/01414_push_predicate_when_contains_with_clause.reference b/tests/queries/0_stateless/01414_push_predicate_when_contains_with_clause.reference new file mode 100644 index 00000000000..a2ee0336191 --- /dev/null +++ b/tests/queries/0_stateless/01414_push_predicate_when_contains_with_clause.reference @@ -0,0 +1,15 @@ +999 1998 +999 1998 +SELECT + number, + square_number +FROM +( + WITH number * 2 AS square_number + SELECT + number, + square_number + FROM numbers_indexed + WHERE number = 999 +) AS squares +WHERE number = 999 diff --git a/tests/queries/0_stateless/01414_push_predicate_when_contains_with_clause.sql b/tests/queries/0_stateless/01414_push_predicate_when_contains_with_clause.sql new file mode 100644 index 00000000000..cf3307205e0 --- /dev/null +++ b/tests/queries/0_stateless/01414_push_predicate_when_contains_with_clause.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS numbers_indexed; +DROP TABLE IF EXISTS squares; + +CREATE TABLE numbers_indexed Engine=MergeTree ORDER BY number PARTITION BY bitShiftRight(number,8) SETTINGS index_granularity=8 AS SELECT * FROM numbers(16384); + +CREATE VIEW squares AS WITH number*2 AS square_number SELECT number, square_number FROM numbers_indexed; + +SET max_rows_to_read=8, read_overflow_mode='throw'; + +WITH number * 2 AS square_number SELECT number, square_number FROM numbers_indexed WHERE number = 999; + +SELECT * FROM squares WHERE number = 999; + +EXPLAIN SYNTAX SELECT number, square_number FROM ( WITH number * 2 AS square_number SELECT number, square_number FROM numbers_indexed) AS squares WHERE number = 999; + +DROP TABLE IF EXISTS squares; +DROP TABLE IF EXISTS numbers_indexed; diff --git a/tests/stress b/tests/stress index 4a6ad411298..1aad49250c2 100755 --- a/tests/stress +++ b/tests/stress @@ -6,7 +6,7 @@ trap 'kill -9 $(jobs -p)' EXIT function thread() { while true; do - ./clickhouse-test --order random 2>&1 | awk '/^\w+:/ { printf("\033[0;%s%sm \033[0m", ('$1' % 2 ? "4" : "10"), (int('$1' / 2) % 8)) }' + ./clickhouse-test --client-option="query-fuzzer-runs=10" --order random 2>&1 | awk '/^\w+:/ { printf("\033[0;%s%sm \033[0m", ('$1' % 2 ? "4" : "10"), (int('$1' / 2) % 8)) }' done }