Merge pull request #70394 from alsugiliazova/Fix--Distinct-combinator-docs

Docs: Fix distinct combinator docs as well as some other minor fixes in docs
This commit is contained in:
Dmitry Novik 2024-10-10 02:41:12 +00:00 committed by GitHub
commit e9a1ac5ff4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 17 additions and 17 deletions

View File

@ -82,7 +82,7 @@ cd ./utils/check-style
# Check duplicate includes # Check duplicate includes
./check-duplicate-includes.sh ./check-duplicate-includes.sh
# Check c++ formatiing # Check c++ formatting
./check-style ./check-style
# Check python formatting with black # Check python formatting with black

View File

@ -374,15 +374,15 @@ Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to
```sql ```sql
CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster] CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster]
AS AS
(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2)); (total_number_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_number_of_all_grams) * log(2));
CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster] CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster]
AS AS
(total_nubmer_of_all_grams, probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2)))); (total_number_of_all_grams, probability_of_false_positives) -> ceil((total_number_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2))));
CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster] CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster]
AS AS
(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions); (total_number_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_number_of_all_grams)), number_of_hash_functions);
CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster] CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster]
AS AS

View File

@ -35,7 +35,7 @@ Engine parameters:
- `root_path` - ZooKeeper path where the `table_name` will be stored. - `root_path` - ZooKeeper path where the `table_name` will be stored.
This path should not contain the prefix defined by `<keeper_map_path_prefix>` config because the prefix will be automatically appended to the `root_path`. This path should not contain the prefix defined by `<keeper_map_path_prefix>` config because the prefix will be automatically appended to the `root_path`.
Additionally, format of `auxiliary_zookeper_cluster_name:/some/path` is also supported where `auxiliary_zookeper_cluster` is a ZooKeeper cluster defined inside `<auxiliary_zookeepers>` config. Additionally, format of `auxiliary_zookeeper_cluster_name:/some/path` is also supported where `auxiliary_zookeeper_cluster` is a ZooKeeper cluster defined inside `<auxiliary_zookeepers>` config.
By default, ZooKeeper cluster defined inside `<zookeeper>` config is used. By default, ZooKeeper cluster defined inside `<zookeeper>` config is used.
- `keys_limit` - number of keys allowed inside the table. - `keys_limit` - number of keys allowed inside the table.
This limit is a soft limit and it can be possible that more keys will end up in the table for some edge cases. This limit is a soft limit and it can be possible that more keys will end up in the table for some edge cases.

View File

@ -877,7 +877,7 @@ INSERT INTO json_as_object (json) FORMAT JSONAsObject {"any json stucture":1}
SELECT time, json FROM json_as_object FORMAT JSONEachRow SELECT time, json FROM json_as_object FORMAT JSONEachRow
``` ```
```resonse ```response
{"time":"2024-09-16 12:18:10","json":{}} {"time":"2024-09-16 12:18:10","json":{}}
{"time":"2024-09-16 12:18:13","json":{"any json stucture":"1"}} {"time":"2024-09-16 12:18:13","json":{"any json stucture":"1"}}
{"time":"2024-09-16 12:18:08","json":{"foo":{"bar":{"x":"y"},"baz":"1"}}} {"time":"2024-09-16 12:18:08","json":{"foo":{"bar":{"x":"y"},"baz":"1"}}}

View File

@ -509,7 +509,7 @@ DESC format(JSONEachRow, $$
{"value" : "424242424242"} {"value" : "424242424242"}
$$) $$)
``` ```
```reponse ```response
┌─name──┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ ┌─name──┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ value │ Nullable(Int64) │ │ │ │ │ │ │ value │ Nullable(Int64) │ │ │ │ │ │
└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ └───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
@ -910,9 +910,9 @@ This setting is disabled by default.
```sql ```sql
SET input_format_json_try_infer_numbers_from_strings = 1; SET input_format_json_try_infer_numbers_from_strings = 1;
DESC format(CSV, '"42","42.42"'); DESC format(CSV, '42,42.42');
``` ```
```reponse ```response
┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ ┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ c1 │ Nullable(Int64) │ │ │ │ │ │ │ c1 │ Nullable(Int64) │ │ │ │ │ │
│ c2 │ Nullable(Float64) │ │ │ │ │ │ │ c2 │ Nullable(Float64) │ │ │ │ │ │

View File

@ -124,7 +124,7 @@ Converts an aggregate function for tables into an aggregate function for arrays
## -Distinct ## -Distinct
Every unique combination of arguments will be aggregated only once. Repeating values are ignored. Every unique combination of arguments will be aggregated only once. Repeating values are ignored.
Examples: `sum(DISTINCT x)`, `groupArray(DISTINCT x)`, `corrStableDistinct(DISTINCT x, y)` and so on. Examples: `sum(DISTINCT x)` (or `sumDistinct(x)`), `groupArray(DISTINCT x)` (or `groupArrayDistinct(x)`), `corrStable(DISTINCT x, y)` (or `corrStableDistinct(x, y)`) and so on.
## -OrDefault ## -OrDefault

View File

@ -86,7 +86,7 @@ The table below describes how different interval kinds of `Interval` data type a
### Aggregate function parameter binary encoding ### Aggregate function parameter binary encoding
The table below describes how parameters of `AggragateFunction` and `SimpleAggregateFunction` are encoded. The table below describes how parameters of `AggregateFunction` and `SimpleAggregateFunction` are encoded.
The encoding of a parameter consists of 1 byte indicating the type of the parameter and the value itself. The encoding of a parameter consists of 1 byte indicating the type of the parameter and the value itself.
| Parameter type | Binary encoding | | Parameter type | Binary encoding |
@ -106,7 +106,7 @@ The encoding of a parameter consists of 1 byte indicating the type of the parame
| `String` | `0x0C<var_uint_size><data>` | | `String` | `0x0C<var_uint_size><data>` |
| `Array` | `0x0D<var_uint_size><value_encoding_1>...<value_encoding_N>` | | `Array` | `0x0D<var_uint_size><value_encoding_1>...<value_encoding_N>` |
| `Tuple` | `0x0E<var_uint_size><value_encoding_1>...<value_encoding_N>` | | `Tuple` | `0x0E<var_uint_size><value_encoding_1>...<value_encoding_N>` |
| `Map` | `0x0F<var_uint_size><key_encoding_1><value_encoding_1>...<key_endoding_N><value_encoding_N>` | | `Map` | `0x0F<var_uint_size><key_encoding_1><value_encoding_1>...<key_encoding_N><value_encoding_N>` |
| `IPv4` | `0x10<uint32_little_endian_value>` | | `IPv4` | `0x10<uint32_little_endian_value>` |
| `IPv6` | `0x11<uint128_little_endian_value>` | | `IPv6` | `0x11<uint128_little_endian_value>` |
| `UUID` | `0x12<uuid_value>` | | `UUID` | `0x12<uuid_value>` |

View File

@ -41,7 +41,7 @@ ORDER BY ts, event_type;
│ 2020-01-02 00:00:00 │ imp │ 2 │ │ 2020-01-02 00:00:00 │ imp │ 2 │
└─────────────────────┴────────────┴─────────────────┘ └─────────────────────┴────────────┴─────────────────┘
-- Let's add the new measurment `cost` -- Let's add the new measurement `cost`
-- and the new dimension `browser`. -- and the new dimension `browser`.
ALTER TABLE events ALTER TABLE events

View File

@ -46,7 +46,7 @@ The `CHECK TABLE` query supports the following table engines:
- [StripeLog](../../engines/table-engines/log-family/stripelog.md) - [StripeLog](../../engines/table-engines/log-family/stripelog.md)
- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) - [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md)
Performed over the tables with another table engines causes an `NOT_IMPLEMETED` exception. Performed over the tables with another table engines causes an `NOT_IMPLEMENTED` exception.
Engines from the `*Log` family do not provide automatic data recovery on failure. Use the `CHECK TABLE` query to track data loss in a timely manner. Engines from the `*Log` family do not provide automatic data recovery on failure. Use the `CHECK TABLE` query to track data loss in a timely manner.

View File

@ -442,7 +442,7 @@ DEFLATE_QPL is not available in ClickHouse Cloud.
### Specialized Codecs ### Specialized Codecs
These codecs are designed to make compression more effective by exploiting specific features of the data. Some of these codecs do not compress data themself, they instead preprocess the data such that a second compression stage using a general-purpose codec can achieve a higher data compression rate. These codecs are designed to make compression more effective by exploiting specific features of the data. Some of these codecs do not compress data themselves, they instead preprocess the data such that a second compression stage using a general-purpose codec can achieve a higher data compression rate.
#### Delta #### Delta

View File

@ -194,7 +194,7 @@ REFRESH EVERY 1 MONTH OFFSET 5 DAY 2 HOUR -- on 6th day of every month, at 2:00
REFRESH EVERY 2 WEEK OFFSET 5 DAY 15 HOUR 10 MINUTE -- every other Saturday, at 3:10 pm REFRESH EVERY 2 WEEK OFFSET 5 DAY 15 HOUR 10 MINUTE -- every other Saturday, at 3:10 pm
REFRESH EVERY 30 MINUTE -- at 00:00, 00:30, 01:00, 01:30, etc REFRESH EVERY 30 MINUTE -- at 00:00, 00:30, 01:00, 01:30, etc
REFRESH AFTER 30 MINUTE -- 30 minutes after the previous refresh completes, no alignment with time of day REFRESH AFTER 30 MINUTE -- 30 minutes after the previous refresh completes, no alignment with time of day
-- REFRESH AFTER 1 HOUR OFFSET 1 MINUTE -- syntax errror, OFFSET is not allowed with AFTER -- REFRESH AFTER 1 HOUR OFFSET 1 MINUTE -- syntax error, OFFSET is not allowed with AFTER
REFRESH EVERY 1 WEEK 2 DAYS -- every 9 days, not on any particular day of the week or month; REFRESH EVERY 1 WEEK 2 DAYS -- every 9 days, not on any particular day of the week or month;
-- specifically, when day number (since 1969-12-29) is divisible by 9 -- specifically, when day number (since 1969-12-29) is divisible by 9
REFRESH EVERY 5 MONTHS -- every 5 months, different months each year (as 12 is not divisible by 5); REFRESH EVERY 5 MONTHS -- every 5 months, different months each year (as 12 is not divisible by 5);

View File

@ -93,7 +93,7 @@ WITH anySimpleState(number) AS c SELECT toTypeName(c), c FROM numbers(1);
## -Distinct {#agg-functions-combinator-distinct} ## -Distinct {#agg-functions-combinator-distinct}
При наличии комбинатора Distinct, каждое уникальное значение аргументов, будет учитано в агрегатной функции только один раз. При наличии комбинатора Distinct, каждое уникальное значение аргументов, будет учитано в агрегатной функции только один раз.
Примеры: `sum(DISTINCT x)`, `groupArray(DISTINCT x)`, `corrStableDistinct(DISTINCT x, y)` и т.п. Примеры: `sum(DISTINCT x)` (или `sumDistinct(x)`), `groupArray(DISTINCT x)` (или `groupArrayDistinct(x)`), `corrStable(DISTINCT x, y)` (или `corrStableDistinct(x, y)`) и т.п.
## -OrDefault {#agg-functions-combinator-ordefault} ## -OrDefault {#agg-functions-combinator-ordefault}