mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge pull request #70394 from alsugiliazova/Fix--Distinct-combinator-docs
Docs: Fix distinct combinator docs as well as some other minor fixes in docs
This commit is contained in:
commit
e9a1ac5ff4
@ -82,7 +82,7 @@ cd ./utils/check-style
|
||||
# Check duplicate includes
|
||||
./check-duplicate-includes.sh
|
||||
|
||||
# Check c++ formatiing
|
||||
# Check c++ formatting
|
||||
./check-style
|
||||
|
||||
# Check python formatting with black
|
||||
|
@ -374,15 +374,15 @@ Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to
|
||||
```sql
|
||||
CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster]
|
||||
AS
|
||||
(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2));
|
||||
(total_number_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_number_of_all_grams) * log(2));
|
||||
|
||||
CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster]
|
||||
AS
|
||||
(total_nubmer_of_all_grams, probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2))));
|
||||
(total_number_of_all_grams, probability_of_false_positives) -> ceil((total_number_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2))));
|
||||
|
||||
CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster]
|
||||
AS
|
||||
(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions);
|
||||
(total_number_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_number_of_all_grams)), number_of_hash_functions);
|
||||
|
||||
CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster]
|
||||
AS
|
||||
|
@ -35,7 +35,7 @@ Engine parameters:
|
||||
|
||||
- `root_path` - ZooKeeper path where the `table_name` will be stored.
|
||||
This path should not contain the prefix defined by `<keeper_map_path_prefix>` config because the prefix will be automatically appended to the `root_path`.
|
||||
Additionally, format of `auxiliary_zookeper_cluster_name:/some/path` is also supported where `auxiliary_zookeper_cluster` is a ZooKeeper cluster defined inside `<auxiliary_zookeepers>` config.
|
||||
Additionally, format of `auxiliary_zookeeper_cluster_name:/some/path` is also supported where `auxiliary_zookeeper_cluster` is a ZooKeeper cluster defined inside `<auxiliary_zookeepers>` config.
|
||||
By default, ZooKeeper cluster defined inside `<zookeeper>` config is used.
|
||||
- `keys_limit` - number of keys allowed inside the table.
|
||||
This limit is a soft limit and it can be possible that more keys will end up in the table for some edge cases.
|
||||
|
@ -877,7 +877,7 @@ INSERT INTO json_as_object (json) FORMAT JSONAsObject {"any json stucture":1}
|
||||
SELECT time, json FROM json_as_object FORMAT JSONEachRow
|
||||
```
|
||||
|
||||
```resonse
|
||||
```response
|
||||
{"time":"2024-09-16 12:18:10","json":{}}
|
||||
{"time":"2024-09-16 12:18:13","json":{"any json stucture":"1"}}
|
||||
{"time":"2024-09-16 12:18:08","json":{"foo":{"bar":{"x":"y"},"baz":"1"}}}
|
||||
|
@ -509,7 +509,7 @@ DESC format(JSONEachRow, $$
|
||||
{"value" : "424242424242"}
|
||||
$$)
|
||||
```
|
||||
```reponse
|
||||
```response
|
||||
┌─name──┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ value │ Nullable(Int64) │ │ │ │ │ │
|
||||
└───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
@ -910,9 +910,9 @@ This setting is disabled by default.
|
||||
|
||||
```sql
|
||||
SET input_format_json_try_infer_numbers_from_strings = 1;
|
||||
DESC format(CSV, '"42","42.42"');
|
||||
DESC format(CSV, '42,42.42');
|
||||
```
|
||||
```reponse
|
||||
```response
|
||||
┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ c1 │ Nullable(Int64) │ │ │ │ │ │
|
||||
│ c2 │ Nullable(Float64) │ │ │ │ │ │
|
||||
|
@ -124,7 +124,7 @@ Converts an aggregate function for tables into an aggregate function for arrays
|
||||
## -Distinct
|
||||
|
||||
Every unique combination of arguments will be aggregated only once. Repeating values are ignored.
|
||||
Examples: `sum(DISTINCT x)`, `groupArray(DISTINCT x)`, `corrStableDistinct(DISTINCT x, y)` and so on.
|
||||
Examples: `sum(DISTINCT x)` (or `sumDistinct(x)`), `groupArray(DISTINCT x)` (or `groupArrayDistinct(x)`), `corrStable(DISTINCT x, y)` (or `corrStableDistinct(x, y)`) and so on.
|
||||
|
||||
## -OrDefault
|
||||
|
||||
|
@ -86,7 +86,7 @@ The table below describes how different interval kinds of `Interval` data type a
|
||||
|
||||
### Aggregate function parameter binary encoding
|
||||
|
||||
The table below describes how parameters of `AggragateFunction` and `SimpleAggregateFunction` are encoded.
|
||||
The table below describes how parameters of `AggregateFunction` and `SimpleAggregateFunction` are encoded.
|
||||
The encoding of a parameter consists of 1 byte indicating the type of the parameter and the value itself.
|
||||
|
||||
| Parameter type | Binary encoding |
|
||||
@ -106,7 +106,7 @@ The encoding of a parameter consists of 1 byte indicating the type of the parame
|
||||
| `String` | `0x0C<var_uint_size><data>` |
|
||||
| `Array` | `0x0D<var_uint_size><value_encoding_1>...<value_encoding_N>` |
|
||||
| `Tuple` | `0x0E<var_uint_size><value_encoding_1>...<value_encoding_N>` |
|
||||
| `Map` | `0x0F<var_uint_size><key_encoding_1><value_encoding_1>...<key_endoding_N><value_encoding_N>` |
|
||||
| `Map` | `0x0F<var_uint_size><key_encoding_1><value_encoding_1>...<key_encoding_N><value_encoding_N>` |
|
||||
| `IPv4` | `0x10<uint32_little_endian_value>` |
|
||||
| `IPv6` | `0x11<uint128_little_endian_value>` |
|
||||
| `UUID` | `0x12<uuid_value>` |
|
||||
|
@ -41,7 +41,7 @@ ORDER BY ts, event_type;
|
||||
│ 2020-01-02 00:00:00 │ imp │ 2 │
|
||||
└─────────────────────┴────────────┴─────────────────┘
|
||||
|
||||
-- Let's add the new measurment `cost`
|
||||
-- Let's add the new measurement `cost`
|
||||
-- and the new dimension `browser`.
|
||||
|
||||
ALTER TABLE events
|
||||
|
@ -46,7 +46,7 @@ The `CHECK TABLE` query supports the following table engines:
|
||||
- [StripeLog](../../engines/table-engines/log-family/stripelog.md)
|
||||
- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md)
|
||||
|
||||
Performed over the tables with another table engines causes an `NOT_IMPLEMETED` exception.
|
||||
Performed over the tables with another table engines causes an `NOT_IMPLEMENTED` exception.
|
||||
|
||||
Engines from the `*Log` family do not provide automatic data recovery on failure. Use the `CHECK TABLE` query to track data loss in a timely manner.
|
||||
|
||||
|
@ -442,7 +442,7 @@ DEFLATE_QPL is not available in ClickHouse Cloud.
|
||||
|
||||
### Specialized Codecs
|
||||
|
||||
These codecs are designed to make compression more effective by exploiting specific features of the data. Some of these codecs do not compress data themself, they instead preprocess the data such that a second compression stage using a general-purpose codec can achieve a higher data compression rate.
|
||||
These codecs are designed to make compression more effective by exploiting specific features of the data. Some of these codecs do not compress data themselves, they instead preprocess the data such that a second compression stage using a general-purpose codec can achieve a higher data compression rate.
|
||||
|
||||
#### Delta
|
||||
|
||||
|
@ -194,7 +194,7 @@ REFRESH EVERY 1 MONTH OFFSET 5 DAY 2 HOUR -- on 6th day of every month, at 2:00
|
||||
REFRESH EVERY 2 WEEK OFFSET 5 DAY 15 HOUR 10 MINUTE -- every other Saturday, at 3:10 pm
|
||||
REFRESH EVERY 30 MINUTE -- at 00:00, 00:30, 01:00, 01:30, etc
|
||||
REFRESH AFTER 30 MINUTE -- 30 minutes after the previous refresh completes, no alignment with time of day
|
||||
-- REFRESH AFTER 1 HOUR OFFSET 1 MINUTE -- syntax errror, OFFSET is not allowed with AFTER
|
||||
-- REFRESH AFTER 1 HOUR OFFSET 1 MINUTE -- syntax error, OFFSET is not allowed with AFTER
|
||||
REFRESH EVERY 1 WEEK 2 DAYS -- every 9 days, not on any particular day of the week or month;
|
||||
-- specifically, when day number (since 1969-12-29) is divisible by 9
|
||||
REFRESH EVERY 5 MONTHS -- every 5 months, different months each year (as 12 is not divisible by 5);
|
||||
|
@ -93,7 +93,7 @@ WITH anySimpleState(number) AS c SELECT toTypeName(c), c FROM numbers(1);
|
||||
## -Distinct {#agg-functions-combinator-distinct}
|
||||
|
||||
При наличии комбинатора Distinct, каждое уникальное значение аргументов, будет учитано в агрегатной функции только один раз.
|
||||
Примеры: `sum(DISTINCT x)`, `groupArray(DISTINCT x)`, `corrStableDistinct(DISTINCT x, y)` и т.п.
|
||||
Примеры: `sum(DISTINCT x)` (или `sumDistinct(x)`), `groupArray(DISTINCT x)` (или `groupArrayDistinct(x)`), `corrStable(DISTINCT x, y)` (или `corrStableDistinct(x, y)`) и т.п.
|
||||
|
||||
## -OrDefault {#agg-functions-combinator-ordefault}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user