diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index cbd3bd7bec4..c52a58eac8a 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -683,3 +683,4 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py + python3 merge_pr.py diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index a513eb9216d..d69020d810e 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -169,3 +169,4 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py + python3 merge_pr.py --check-approved diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index aecf3799a5d..c677ec4bf5c 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -4388,3 +4388,4 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py + python3 merge_pr.py --check-approved diff --git a/contrib/poco b/contrib/poco index 79923422618..0ab9bba7cca 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 799234226187c0ae0b8c90f23465b25ed7956e56 +Subproject commit 0ab9bba7ccad3c8dacce04a35cb3b78218547ab4 diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 2582b599d58..3458cf905da 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -5,6 +5,7 @@ set -x # core.COMM.PID-TID sysctl kernel.core_pattern='core.%e.%p-%P' +dmesg --clear ||: set -e set -u @@ -368,6 +369,7 @@ if [ -f core.zst ]; then fi rg --text -F '' server.log > fatal.log ||: +dmesg -T > dmesg.log ||: zstd --threads=0 server.log @@ -396,6 +398,7 @@ p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-s fuzzer.log server.log.zst main.log + dmesg.log ${CORE_LINK}

diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 2165045e565..ee4b5d7c156 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -128,6 +128,7 @@ function run_tests() if [[ "${HIGH_LEVEL_COVERAGE}" = "YES" ]]; then ADDITIONAL_OPTIONS+=('--report-coverage') + ADDITIONAL_OPTIONS+=('--report-logs-stats') fi set +e diff --git a/docker/test/stress/stress b/docker/test/stress/stress index cf92b86c18f..4afd2745526 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -289,6 +289,7 @@ if __name__ == "__main__": "--database=system", "--hung-check", "--stress", + "--report-logs-stats", "00001_select_1", ] ) diff --git a/docs/en/engines/database-engines/postgresql.md b/docs/en/engines/database-engines/postgresql.md index ac19794c167..939995a61c5 100644 --- a/docs/en/engines/database-engines/postgresql.md +++ b/docs/en/engines/database-engines/postgresql.md @@ -136,3 +136,7 @@ DESCRIBE TABLE test_database.test_table; │ data │ Nullable(String) │ └────────┴───────────────────┘ ``` + +## Related content + +- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres) diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index 7f9659400b8..b73d28c8508 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -175,3 +175,6 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32) - [The `postgresql` table function](../../../sql-reference/table-functions/postgresql.md) - [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql) + +## Related content +- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index e28c486afca..d384ed639eb 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1203,12 +1203,14 @@ SELECT * FROM json_each_row_nested - [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`. - [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`. - [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`. +- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`. +- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`. - [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. - [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. - [output_format_json_quote_denormals](/docs/en/operations/settings/settings.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. - [output_format_json_quote_decimals](/docs/en/operations/settings/settings.md/#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`. - [output_format_json_escape_forward_slashes](/docs/en/operations/settings/settings.md/#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`. -- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `false`. +- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `true`. - [output_format_json_array_of_rows](/docs/en/operations/settings/settings.md/#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`. - [output_format_json_validate_utf8](/docs/en/operations/settings/settings.md/#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`. diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 376b7480358..7a6b2340d29 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -266,7 +266,7 @@ Default value: 0. Limits the size in bytes of the hash table used when joining tables. -This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). +This setting applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md). If the query contains joins, ClickHouse checks this setting for every intermediate result. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 4ffe2bbc7c4..9def33debbd 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -402,40 +402,62 @@ Default value: `ALL`. ## join_algorithm {#settings-join_algorithm} -Specifies [JOIN](../../sql-reference/statements/select/join.md) algorithm. +Specifies which [JOIN](../../sql-reference/statements/select/join.md) algorithm is used. Several algorithms can be specified, and an available one would be chosen for a particular query based on kind/strictness and table engine. Possible values: -- `default` — `hash` or `direct`, if possible (same as `direct,hash`) +### `default` -- `hash` — [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section. +This is the equivalent of `hash` or `direct`, if possible (same as `direct,hash`) -- `parallel_hash` - a variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process. +### `grace_hash` + +[Grace hash join](https://en.wikipedia.org/wiki/Hash_join#Grace_hash_join) is used. Grace hash provides an algorithm option that provides performant complex joins while limiting memory use. + +The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned. + +### `hash` + +[Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section. + +### `parallel_hash` + +A variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process. When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM. -- `partial_merge` — a variation of the [sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join), where only the right table is fully sorted. +### `partial_merge` + +A variation of the [sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join), where only the right table is fully sorted. The `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported). -When using `partial_merge` algorithm, ClickHouse sorts the data and dumps it to the disk. The `partial_merge` algorithm in ClickHouse differs slightly from the classic realization. First, ClickHouse sorts the right table by joining keys in blocks and creates a min-max index for sorted blocks. Then it sorts parts of the left table by `join key` and joins them over the right table. The min-max index is also used to skip unneeded right table blocks. +When using the `partial_merge` algorithm, ClickHouse sorts the data and dumps it to the disk. The `partial_merge` algorithm in ClickHouse differs slightly from the classic realization. First, ClickHouse sorts the right table by joining keys in blocks and creates a min-max index for sorted blocks. Then it sorts parts of the left table by the `join key` and joins them over the right table. The min-max index is also used to skip unneeded right table blocks. -- `direct` - can be applied when the right storage supports key-value requests. +### `direct` + +This algorithm can be applied when the storage for the right table supports key-value requests. The `direct` algorithm performs a lookup in the right table using rows from the left table as keys. It's supported only by special storage such as [Dictionary](../../engines/table-engines/special/dictionary.md/#dictionary) or [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) and only the `LEFT` and `INNER` JOINs. -- `auto` — try `hash` join and switch on the fly to another algorithm if the memory limit is violated. +### `auto` -- `full_sorting_merge` — [Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) with full sorting joined tables before joining. +When set to `auto`, `hash` join is tried first, and the algorithm is switched on the fly to another algorithm if the memory limit is violated. -- `prefer_partial_merge` — ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`. +### `full_sorting_merge` + +[Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) with full sorting joined tables before joining. + +### `prefer_partial_merge` + +ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`. ## join_any_take_last_row {#settings-join_any_take_last_row} -Changes behaviour of join operations with `ANY` strictness. +Changes the behaviour of join operations with `ANY` strictness. :::warning This setting applies only for `JOIN` operations with [Join](../../engines/table-engines/special/join.md) engine tables. @@ -498,7 +520,7 @@ Default value: 65536. Limits the number of files allowed for parallel sorting in MergeJoin operations when they are executed on disk. -The bigger the value of the setting, the more RAM used and the less disk I/O needed. +The bigger the value of the setting, the more RAM is used and the less disk I/O is needed. Possible values: @@ -514,12 +536,12 @@ Enables legacy ClickHouse server behaviour in `ANY INNER|LEFT JOIN` operations. Use this setting only for backward compatibility if your use cases depend on legacy `JOIN` behaviour. ::: -When the legacy behaviour enabled: +When the legacy behaviour is enabled: - Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping. - Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do. -When the legacy behaviour disabled: +When the legacy behaviour is disabled: - Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations. - Results of `ANY INNER JOIN` operations contain one row per key from both the left and right tables. @@ -572,7 +594,7 @@ Default value: `163840`. ## merge_tree_min_rows_for_concurrent_read_for_remote_filesystem {#merge-tree-min-rows-for-concurrent-read-for-remote-filesystem} -The minimum number of lines to read from one file before [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem. +The minimum number of lines to read from one file before the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem. Possible values: @@ -706,7 +728,7 @@ log_queries=1 ## log_queries_min_query_duration_ms {#settings-log-queries-min-query-duration-ms} -If enabled (non-zero), queries faster then the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables: +If enabled (non-zero), queries faster than the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables: - `system.query_log` - `system.query_thread_log` @@ -741,7 +763,7 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING' Setting up query threads logging. -Query threads log into [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting have effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter. +Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter. Possible values: @@ -760,7 +782,7 @@ log_query_threads=1 Setting up query views logging. -When a query run by ClickHouse with this setup on has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_views_log) server configuration parameter. +When a query run by ClickHouse with this setting enabled has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_views_log) server configuration parameter. Example: @@ -787,7 +809,7 @@ It can be used to improve the readability of server logs. Additionally, it helps Possible values: -- Any string no longer than [max_query_size](#settings-max_query_size). If length is exceeded, the server throws an exception. +- Any string no longer than [max_query_size](#settings-max_query_size). If the max_query_size is exceeded, the server throws an exception. Default value: empty string. @@ -821,11 +843,11 @@ The setting also does not have a purpose when using INSERT SELECT, since data is Default value: 1,048,576. -The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allow sorting more data in RAM. +The default is slightly more than `max_block_size`. The reason for this is that certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allow sorting more data in RAM. ## min_insert_block_size_rows {#min-insert-block-size-rows} -Sets the minimum number of rows in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. +Sets the minimum number of rows in the block that can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. Possible values: @@ -891,7 +913,7 @@ Higher values will lead to higher memory usage. ## max_compress_block_size {#max-compress-block-size} -The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. +The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying a smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. :::warning This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse. @@ -935,7 +957,7 @@ Default value: 1000. ## interactive_delay {#interactive-delay} -The interval in microseconds for checking whether request execution has been cancelled and sending the progress. +The interval in microseconds for checking whether request execution has been canceled and sending the progress. Default value: 100,000 (checks for cancelling and sends the progress ten times per second). @@ -4122,7 +4144,20 @@ Enabled by default. Serialize named tuple columns as JSON objects. -Disabled by default. +Enabled by default. + +### input_format_json_named_tuples_as_objects {#input_format_json_named_tuples_as_objects} + +Parse named tuple columns as JSON objects. + +Enabled by default. + +### input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple} + +Insert default values for missing elements in JSON object while parsing named tuple. +This setting works only when setting `input_format_json_named_tuples_as_objects` is enabled. + +Enabled by default. ### output_format_json_array_of_rows {#output_format_json_array_of_rows} diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index 963ddfe7a02..a4fa5579638 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -120,5 +120,6 @@ Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec. ## Related Content +- [Extracting, converting, and querying data in local files using clickhouse-local](https://clickhouse.com/blog/extracting-converting-querying-local-files-with-sql-clickhouse-local) - [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1) - [Exploring massive, real-world data sets: 100+ Years of Weather Records in ClickHouse](https://clickhouse.com/blog/real-world-data-noaa-climate-data) diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index 575141766dd..bd8e72e0fec 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -57,6 +57,7 @@ ClickHouse-specific aggregate functions: - [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) - [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md) - [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md) +- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md) - [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md) - [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md) - [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md) @@ -77,4 +78,6 @@ ClickHouse-specific aggregate functions: - [contingency](./contingency.md) - [cramersV](./cramersv.md) - [cramersVBiasCorrected](./cramersvbiascorrected.md) -- [theilsU](./theilsu.md) \ No newline at end of file +- [theilsU](./theilsu.md) +- [maxIntersections](./maxintersections.md) +- [maxIntersectionsPosition](./maxintersectionsposition.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md new file mode 100644 index 00000000000..db99b900a3e --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md @@ -0,0 +1,64 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/maxintersections +sidebar_position: 360 +title: maxIntersections +--- + +# maxIntersections + +Aggregate function that calculates the maximum number of times that a group of intervals intersects each other (if all the intervals intersect at least once). + +The syntax is: + +```sql +maxIntersections(start_column, end_column) +``` + +**Arguments** + +- `start_column` – the numeric column that represents the start of each interval. If `start_column` is `NULL` or 0 then the interval will be skipped. + +- `end_column` - the numeric column that represents the end of each interval. If `end_column` is `NULL` or 0 then the interval will be skipped. + +**Returned value** + +Returns the maximum number of intersected intervals. + +**Example** + +```sql +CREATE TABLE my_events ( + start UInt32, + end UInt32 +) +Engine = MergeTree +ORDER BY tuple(); + +INSERT INTO my_events VALUES + (1, 3), + (1, 6), + (2, 5), + (3, 7); +``` + +The intervals look like the following: + +```response +1 - 3 +1 - - - - 6 + 2 - - 5 + 3 - - - 7 +``` + +Three of these intervals have a common value (the value is `4`, but the value that is common is not important, we are measuring the count of the intersections). The intervals `(1,3)` and `(3,7)` share an endpoint but are not considered intersecting by the `maxIntersections` function. + +```sql +SELECT maxIntersections(start, end) FROM my_events; +``` + +Response: +```response +3 +``` + +If you have multiple occurrences of the maximum interval, you can use the [`maxIntersectionsPosition` function](./maxintersectionsposition.md) to locate the number and location of those occurrences. \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md new file mode 100644 index 00000000000..7dd63f09316 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md @@ -0,0 +1,64 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/maxintersectionsposition +sidebar_position: 361 +title: maxIntersectionsPosition +--- + +# maxIntersectionsPosition + +Aggregate function that calculates the positions of the occurrences of the [`maxIntersections` function](./maxintersections.md). + +The syntax is: + +```sql +maxIntersectionsPosition(start_column, end_column) +``` + +**Arguments** + +- `start_column` – the numeric column that represents the start of each interval. If `start_column` is `NULL` or 0 then the interval will be skipped. + +- `end_column` - the numeric column that represents the end of each interval. If `end_column` is `NULL` or 0 then the interval will be skipped. + +**Returned value** + +Returns the start positions of the maximum number of intersected intervals. + +**Example** + +```sql +CREATE TABLE my_events ( + start UInt32, + end UInt32 +) +Engine = MergeTree +ORDER BY tuple(); + +INSERT INTO my_events VALUES + (1, 3), + (1, 6), + (2, 5), + (3, 7); +``` + +The intervals look like the following: + +```response +1 - 3 +1 - - - - 6 + 2 - - 5 + 3 - - - 7 +``` + +Notice that three of these intervals have the value 4 in common, and that starts with the 2nd interval: + +```sql +SELECT maxIntersectionsPosition(start, end) FROM my_events; +``` + +Response: +```response +2 +``` + +In other words, the `(1,6)` row is the start of the 3 intervals that intersect, and 3 is the maximum number of intervals that intersect. \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md new file mode 100644 index 00000000000..07fcd187217 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md @@ -0,0 +1,68 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/quantileInterpolatedWeighted +sidebar_position: 203 +--- + +# quantileInterpolatedWeighted + +Computes [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using linear interpolation, taking into account the weight of each element. + +To get the interpolated value, all the passed values are combined into an array, which are then sorted by their corresponding weights. Quantile interpolation is then performed using the [weighted percentile method](https://en.wikipedia.org/wiki/Percentile#The_weighted_percentile_method) by building a cumulative distribution based on weights and then a linear interpolation is performed using the weights and the values to compute the quantiles. + +When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function. + +**Syntax** + +``` sql +quantileInterpolatedWeighted(level)(expr, weight) +``` + +Alias: `medianInterpolatedWeighted`. + +**Arguments** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. + +**Returned value** + +- Quantile of the specified level. + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Input table: + +``` text +┌─n─┬─val─┐ +│ 0 │ 3 │ +│ 1 │ 2 │ +│ 2 │ 1 │ +│ 5 │ 4 │ +└───┴─────┘ +``` + +Query: + +``` sql +SELECT quantileInterpolatedWeighted(n, val) FROM t +``` + +Result: + +``` text +┌─quantileInterpolatedWeighted(n, val)─┐ +│ 1 │ +└──────────────────────────────────────┘ +``` + +**See Also** + +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 5c9120fb8f4..57151915336 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -9,7 +9,7 @@ sidebar_position: 201 Syntax: `quantiles(level1, level2, …)(x)` -All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. +All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. ## quantilesExactExclusive diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index ab1596b1760..d9099ba5ad3 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -6,6 +6,10 @@ sidebar_label: JSON # JSON +:::warning +This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/guides/developer/working-with-json/json-load-data.md) instead. +::: + Stores JavaScript Object Notation (JSON) documents in a single column. `JSON` is an alias for `Object('json')`. diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index c044b972754..9d2f89c1837 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -121,7 +121,7 @@ Accepts an empty array and returns a one-element array that is equal to the defa ## range(end), range(\[start, \] end \[, step\]) -Returns an array of `UInt` numbers from `start` to `end - 1` by `step`. +Returns an array of numbers from `start` to `end - 1` by `step`. The supported types are [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64](../data-types/int-uint.md). **Syntax** ``` sql @@ -130,31 +130,30 @@ range([start, ] end [, step]) **Arguments** -- `start` — The first element of the array. Optional, required if `step` is used. Default value: 0. [UInt](../data-types/int-uint.md) -- `end` — The number before which the array is constructed. Required. [UInt](../data-types/int-uint.md) -- `step` — Determines the incremental step between each element in the array. Optional. Default value: 1. [UInt](../data-types/int-uint.md) +- `start` — The first element of the array. Optional, required if `step` is used. Default value: 0. +- `end` — The number before which the array is constructed. Required. +- `step` — Determines the incremental step between each element in the array. Optional. Default value: 1. **Returned value** -- Array of `UInt` numbers from `start` to `end - 1` by `step`. +- Array of numbers from `start` to `end - 1` by `step`. **Implementation details** -- All arguments must be positive values: `start`, `end`, `step` are `UInt` data types, as well as elements of the returned array. +- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments's. - An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting. - **Examples** Query: ``` sql -SELECT range(5), range(1, 5), range(1, 5, 2); +SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2); ``` Result: ```txt -┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┐ -│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │ -└─────────────┴─────────────┴────────────────┘ +┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┬─range(-1, 5, 2)─┐ +│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │ [-1,1,3] │ +└─────────────┴─────────────┴────────────────┴─────────────────┘ ``` ## array(x1, …), operator \[x1, …\] diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md index dd56b47cd3a..380c8364090 100644 --- a/docs/en/sql-reference/table-functions/generate.md +++ b/docs/en/sql-reference/table-functions/generate.md @@ -39,3 +39,16 @@ SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64( │ [68] │ -67417.0770 │ ('2080-03-12 14:17:31.269','110425e5-413f-10a6-05ba-fa6b3e929f15') │ └──────────┴──────────────┴────────────────────────────────────────────────────────────────────┘ ``` + +```sql +CREATE TABLE random (a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)) engine=Memory; +INSERT INTO random SELECT * FROM generateRandom() LIMIT 2; +SELECT * FROM random; +``` + +```text +┌─a────────────────────────────┬────────────d─┬─c──────────────────────────────────────────────────────────────────┐ +│ [] │ 68091.8197 │ ('2037-10-02 12:44:23.368','039ecab7-81c2-45ee-208c-844e5c6c5652') │ +│ [8,-83,0,-22,65,9,-30,28,64] │ -186233.4909 │ ('2062-01-11 00:06:04.124','69563ea1-5ad1-f870-16d8-67061da0df25') │ +└──────────────────────────────┴──────────────┴────────────────────────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/zh/sql-reference/functions/array-functions.md b/docs/zh/sql-reference/functions/array-functions.md index 565304710cc..d150b94b8af 100644 --- a/docs/zh/sql-reference/functions/array-functions.md +++ b/docs/zh/sql-reference/functions/array-functions.md @@ -117,7 +117,7 @@ SELECT notEmpty([1,2]); ## range(end), range(\[start, \] end \[, step\]) {#range} -返回一个以`step`作为增量步长的从`start`到`end - 1`的`UInt`类型数字数组。 +返回一个以`step`作为增量步长的从`start`到`end - 1`的整形数字数组, 支持类型包括[`UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`](../data-types/int-uint.md)。 **语法** ``` sql @@ -126,31 +126,30 @@ range([start, ] end [, step]) **参数** -- `start` — 数组的第一个元素。可选项,如果设置了`step`时同样需要`start`,默认值为:0,类型为[UInt](../data-types/int-uint.md)。 -- `end` — 计数到`end`结束,但不包括`end`,必填项,类型为[UInt](../data-types/int-uint.md)。 -- `step` — 确定数组中每个元素之间的增量步长。可选项,默认值为:1,类型为[UInt](../data-types/int-uint.md)。 +- `start` — 数组的第一个元素。可选项,如果设置了`step`时同样需要`start`,默认值为:0。 +- `end` — 计数到`end`结束,但不包括`end`,必填项。 +- `step` — 确定数组中每个元素之间的增量步长。可选项,默认值为:1。 **返回值** -- 以`step`作为增量步长的从`start`到`end - 1`的`UInt`类型数字数组。 +- 以`step`作为增量步长的从`start`到`end - 1`的数字数组。 **注意事项** -- 所有参数必须是正值:`start`、`end`、`step`,类型均为`UInt`,结果数组的元素与此相同。 +- 所有参数`start`、`end`、`step`必须属于以下几种类型之一:[`UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`](../data-types/int-uint.md)。结果数组的元素数据类型为所有入参类型的最小超类,也必须属于以上几种类型之一。 - 如果查询结果的数组总长度超过[function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block)指定的元素数,将会抛出异常。 - **示例** 查询语句: ``` sql -SELECT range(5), range(1, 5), range(1, 5, 2); +SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2); ``` 结果: ```txt -┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┐ -│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │ -└─────────────┴─────────────┴────────────────┘ +┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┬─range(-1, 5, 2)─┐ +│ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │ [-1,1,3] │ +└─────────────┴─────────────┴────────────────┴─────────────────┘ ``` ## array(x1, …), operator \[x1, …\] {#arrayx1-operator-x1} diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 742838d6433..419b80ccff2 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -140,6 +140,7 @@ namespace CurrentMetrics namespace ProfileEvents { extern const Event MainConfigLoads; + extern const Event ServerStartupMilliseconds; } namespace fs = std::filesystem; @@ -652,6 +653,8 @@ static void sanityChecks(Server & server) int Server::main(const std::vector & /*args*/) try { + Stopwatch startup_watch; + Poco::Logger * log = &logger(); UseSSL use_ssl; @@ -1822,6 +1825,9 @@ try LOG_INFO(log, "Ready for connections."); } + startup_watch.stop(); + ProfileEvents::increment(ProfileEvents::ServerStartupMilliseconds, startup_watch.elapsedMilliseconds()); + try { global_context->startClusterDiscovery(); diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 366667410d5..f1f99fc9166 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -167,6 +167,7 @@ enum class AccessType M(SYSTEM_SYNC_REPLICA, "SYNC REPLICA", TABLE, SYSTEM) \ M(SYSTEM_RESTART_REPLICA, "RESTART REPLICA", TABLE, SYSTEM) \ M(SYSTEM_RESTORE_REPLICA, "RESTORE REPLICA", TABLE, SYSTEM) \ + M(SYSTEM_WAIT_LOADING_PARTS, "WAIT LOADING PARTS", TABLE, SYSTEM) \ M(SYSTEM_SYNC_DATABASE_REPLICA, "SYNC DATABASE REPLICA", DATABASE, SYSTEM) \ M(SYSTEM_SYNC_TRANSACTION_LOG, "SYNC TRANSACTION LOG", GLOBAL, SYSTEM) \ M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \ diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index 02aafb7415b..e21ebda2a31 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -53,7 +53,7 @@ TEST(AccessRights, Union) "SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, " "SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, " - "SYSTEM RESTORE REPLICA, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*"); + "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*"); } diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h index c559b3f115f..ac81f7466fa 100644 --- a/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -207,7 +207,7 @@ private: { // Fuse points if their text representations differ only in last digit auto min_diff = 10 * (points[left].mean + points[right].mean) * std::numeric_limits::epsilon(); - if (points[left].mean + min_diff >= points[right].mean) + if (points[left].mean + std::fabs(min_diff) >= points[right].mean) { points[left] = points[left] + points[right]; } diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h index 6427d03f089..49157acf690 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantile.h +++ b/src/AggregateFunctions/AggregateFunctionQuantile.h @@ -232,6 +232,9 @@ struct NameQuantilesExactInclusive { static constexpr auto name = "quantilesExac struct NameQuantileExactWeighted { static constexpr auto name = "quantileExactWeighted"; }; struct NameQuantilesExactWeighted { static constexpr auto name = "quantilesExactWeighted"; }; +struct NameQuantileInterpolatedWeighted { static constexpr auto name = "quantileInterpolatedWeighted"; }; +struct NameQuantilesInterpolatedWeighted { static constexpr auto name = "quantilesInterpolatedWeighted"; }; + struct NameQuantileTiming { static constexpr auto name = "quantileTiming"; }; struct NameQuantileTimingWeighted { static constexpr auto name = "quantileTimingWeighted"; }; struct NameQuantilesTiming { static constexpr auto name = "quantilesTiming"; }; diff --git a/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp b/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp new file mode 100644 index 00000000000..68b42376df7 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionQuantileInterpolatedWeighted.cpp @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +struct Settings; + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + + template using FuncQuantileInterpolatedWeighted = AggregateFunctionQuantile, NameQuantileInterpolatedWeighted, true, void, false>; + template using FuncQuantilesInterpolatedWeighted = AggregateFunctionQuantile, NameQuantilesInterpolatedWeighted, true, void, true>; + + template